diabetes_data = read.csv("diabetes_binary.csv")
str(diabetes_data)
'data.frame': 70692 obs. of 22 variables:
$ Diabetes_binary : num 0 0 0 0 0 0 0 0 0 0 ...
$ HighBP : num 1 1 0 1 0 0 0 0 0 0 ...
$ HighChol : num 0 1 0 1 0 0 1 0 0 0 ...
$ CholCheck : num 1 1 1 1 1 1 1 1 1 1 ...
$ BMI : num 26 26 26 28 29 18 26 31 32 27 ...
$ Smoker : num 0 1 0 1 1 0 1 1 0 1 ...
$ Stroke : num 0 1 0 0 0 0 0 0 0 0 ...
$ HeartDiseaseorAttack: num 0 0 0 0 0 0 0 0 0 0 ...
$ PhysActivity : num 1 0 1 1 1 1 1 0 1 0 ...
$ Fruits : num 0 1 1 1 1 1 1 1 1 1 ...
$ Veggies : num 1 0 1 1 1 1 1 1 1 1 ...
$ HvyAlcoholConsump : num 0 0 0 0 0 0 1 0 0 0 ...
$ AnyHealthcare : num 1 1 1 1 1 0 1 1 1 1 ...
$ NoDocbcCost : num 0 0 0 0 0 0 0 0 0 0 ...
$ GenHlth : num 3 3 1 3 2 2 1 4 3 3 ...
$ MentHlth : num 5 0 0 0 0 7 0 0 0 0 ...
$ PhysHlth : num 30 0 10 3 0 0 0 0 0 6 ...
$ DiffWalk : num 0 0 0 0 0 0 0 0 0 0 ...
$ Sex : num 1 1 1 1 0 0 1 1 0 1 ...
$ Age : num 4 12 13 11 8 1 13 6 3 6 ...
$ Education : num 6 6 6 6 5 4 5 4 6 4 ...
$ Income : num 8 8 8 8 8 7 6 3 8 4 ...
cols_skip <- c('BMI', 'GenHlth', 'MentHlth', 'PhysHlth', 'Age', 'Education', 'Income')
cols_skip_indices <- which(names(diabetes_data) %in% cols_skip)
diabetes_data[, -cols_skip_indices] <- lapply(diabetes_data[, -cols_skip_indices], factor)
str(diabetes_data)
'data.frame': 70692 obs. of 22 variables:
$ Diabetes_binary : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ HighBP : Factor w/ 2 levels "0","1": 2 2 1 2 1 1 1 1 1 1 ...
$ HighChol : Factor w/ 2 levels "0","1": 1 2 1 2 1 1 2 1 1 1 ...
$ CholCheck : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
$ BMI : num 26 26 26 28 29 18 26 31 32 27 ...
$ Smoker : Factor w/ 2 levels "0","1": 1 2 1 2 2 1 2 2 1 2 ...
$ Stroke : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
$ HeartDiseaseorAttack: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ PhysActivity : Factor w/ 2 levels "0","1": 2 1 2 2 2 2 2 1 2 1 ...
$ Fruits : Factor w/ 2 levels "0","1": 1 2 2 2 2 2 2 2 2 2 ...
$ Veggies : Factor w/ 2 levels "0","1": 2 1 2 2 2 2 2 2 2 2 ...
$ HvyAlcoholConsump : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 1 1 1 ...
$ AnyHealthcare : Factor w/ 2 levels "0","1": 2 2 2 2 2 1 2 2 2 2 ...
$ NoDocbcCost : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ GenHlth : num 3 3 1 3 2 2 1 4 3 3 ...
$ MentHlth : num 5 0 0 0 0 7 0 0 0 0 ...
$ PhysHlth : num 30 0 10 3 0 0 0 0 0 6 ...
$ DiffWalk : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ Sex : Factor w/ 2 levels "0","1": 2 2 2 2 1 1 2 2 1 2 ...
$ Age : num 4 12 13 11 8 1 13 6 3 6 ...
$ Education : num 6 6 6 6 5 4 5 4 6 4 ...
$ Income : num 8 8 8 8 8 7 6 3 8 4 ...
summary(diabetes_data)
Diabetes_binary HighBP HighChol CholCheck BMI Smoker Stroke HeartDiseaseorAttack
0:35346 0:30860 0:33529 0: 1749 Min. :12.00 0:37094 0:66297 0:60243
1:35346 1:39832 1:37163 1:68943 1st Qu.:25.00 1:33598 1: 4395 1:10449
Median :29.00
Mean :29.86
3rd Qu.:33.00
Max. :98.00
PhysActivity Fruits Veggies HvyAlcoholConsump AnyHealthcare NoDocbcCost GenHlth
0:20993 0:27443 0:14932 0:67672 0: 3184 0:64053 Min. :1.000
1:49699 1:43249 1:55760 1: 3020 1:67508 1: 6639 1st Qu.:2.000
Median :3.000
Mean :2.837
3rd Qu.:4.000
Max. :5.000
MentHlth PhysHlth DiffWalk Sex Age Education Income
Min. : 0.000 Min. : 0.00 0:52826 0:38386 Min. : 1.000 Min. :1.000 Min. :1.000
1st Qu.: 0.000 1st Qu.: 0.00 1:17866 1:32306 1st Qu.: 7.000 1st Qu.:4.000 1st Qu.:4.000
Median : 0.000 Median : 0.00 Median : 9.000 Median :5.000 Median :6.000
Mean : 3.752 Mean : 5.81 Mean : 8.584 Mean :4.921 Mean :5.698
3rd Qu.: 2.000 3rd Qu.: 6.00 3rd Qu.:11.000 3rd Qu.:6.000 3rd Qu.:8.000
Max. :30.000 Max. :30.00 Max. :13.000 Max. :6.000 Max. :8.000
proportions(table(diabetes_data$Diabetes_binary))
0 1
0.5 0.5
pie(proportions(table(diabetes_data$Diabetes_binary)), labels = c('Non-Diabetes', 'Diabetes'), col = c('green', 'red'))
In the above observation, we can see that the diabetes data is balanced. Data is distributed equally in both non-diabetic and diabetic data (50% each).
numerical_variables <- array(, dim = c(0))
categorical_variables <- array(, dim = c(0))
for (c in colnames(diabetes_data))
{
if (is.numeric(diabetes_data[,c])){
numerical_variables <- c(numerical_variables, c)
} else if (is.factor(diabetes_data[,c])){
categorical_variables <- c(categorical_variables, c)
}
}
cat("Categorical variables:", categorical_variables, "\n","\n")
Categorical variables: Diabetes_binary HighBP HighChol CholCheck Smoker Stroke HeartDiseaseorAttack PhysActivity Fruits Veggies HvyAlcoholConsump AnyHealthcare NoDocbcCost DiffWalk Sex
cat("Numerical variables:", numerical_variables, "\n")
Numerical variables: BMI GenHlth MentHlth PhysHlth Age Education Income
missing_counts <- colSums(is.na(diabetes_data))
cat("Missing Counts in each columns:", "\n")
Missing Counts in each columns:
print(missing_counts)
Diabetes_binary HighBP HighChol CholCheck BMI
0 0 0 0 0
Smoker Stroke HeartDiseaseorAttack PhysActivity Fruits
0 0 0 0 0
Veggies HvyAlcoholConsump AnyHealthcare NoDocbcCost GenHlth
0 0 0 0 0
MentHlth PhysHlth DiffWalk Sex Age
0 0 0 0 0
Education Income
0 0
diabetes_indices <- which(names(diabetes_data)=='Diabetes_binary')
for (c in colnames(diabetes_data[, -diabetes_indices]))
{
if (is.factor(diabetes_data[,c])){
try({
pvalue = chisq.test(diabetes_data$Diabetes_binary, diabetes_data[,c])
cat('pvalue of the chi-square test b/w',c,"and Diabetes is:", pvalue$p.value, '\n')
mosaicplot(diabetes_data$Diabetes_binary~diabetes_data[,c], shade=TRUE, main=paste("Mosaic Plot of Diabetes vs", c), xlab="Diabetes", ylab=c, las=1)
})
}
else if (is.numeric(diabetes_data[,c])){
try({
pvalue = oneway.test(diabetes_data[,c]~diabetes_data$Diabetes_binary)
cat('pvalue of the oneway test b/w',c,"and Diabetes is:", pvalue$p.value, '\n')
boxplot(diabetes_data$Diabetes_binary, diabetes_data[,c], col = '#69b3a2', xlab="Diabetes", ylab=c, main=paste("Box Plot of Diabetes vs", c))
})
}
if (pvalue$p.value > 0.05) {
housing_data[[c]] <- NULL
cat('\n', 'Removing',c,"from dataset as it's p-value is greater than 0.05:", pvalue$p.value, '\n')
}
}
pvalue of the chi-square test b/w HighBP and Diabetes is: 0
pvalue of the chi-square test b/w HighChol and Diabetes is: 0
pvalue of the chi-square test b/w CholCheck and Diabetes is: 2.379871e-206
pvalue of the oneway test b/w BMI and Diabetes is: 0
pvalue of the chi-square test b/w Smoker and Diabetes is: 1.221105e-115
pvalue of the chi-square test b/w Stroke and Diabetes is: 1.290837e-243
pvalue of the chi-square test b/w HeartDiseaseorAttack and Diabetes is: 0
pvalue of the chi-square test b/w PhysActivity and Diabetes is: 0
pvalue of the chi-square test b/w Fruits and Diabetes is: 7.967065e-47
pvalue of the chi-square test b/w Veggies and Diabetes is: 1.40071e-98
pvalue of the chi-square test b/w HvyAlcoholConsump and Diabetes is: 3.913396e-140
pvalue of the chi-square test b/w AnyHealthcare and Diabetes is: 7.855834e-10
pvalue of the chi-square test b/w NoDocbcCost and Diabetes is: 1.405326e-27
pvalue of the oneway test b/w GenHlth and Diabetes is: 0
pvalue of the oneway test b/w MentHlth and Diabetes is: 7.117624e-119
pvalue of the oneway test b/w PhysHlth and Diabetes is: 0
pvalue of the chi-square test b/w DiffWalk and Diabetes is: 0
pvalue of the chi-square test b/w Sex and Diabetes is: 3.860396e-32
pvalue of the oneway test b/w Age and Diabetes is: 0
pvalue of the oneway test b/w Education and Diabetes is: 0
pvalue of the oneway test b/w Income and Diabetes is: 0
library(caret)
Loading required package: ggplot2
Loading required package: lattice
partition_indices = createDataPartition(diabetes_data$Diabetes_binary, p=0.8, list = FALSE)
diabetes_train_data = diabetes_data[partition_indices, ]
diabetes_train_data
diabetes_test_data = diabetes_data[-partition_indices, ]
diabetes_test_data
true_labels = diabetes_test_data$Diabetes_binary
library(caret)
trainControl <- trainControl(method = "cv", number = 5)
knn_model <- train(Diabetes_binary ~ ., data = diabetes_train_data, method = "knn", trControl = trainControl)
knn_predictions <- predict(knn_model, newdata = diabetes_test_data)
knn_confusion_matrix_class0 <- confusionMatrix(knn_predictions, true_labels, mode='everything')
print(knn_confusion_matrix_class0)
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 4862 1689
1 2207 5380
Accuracy : 0.7244
95% CI : (0.717, 0.7318)
No Information Rate : 0.5
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.4489
Mcnemar's Test P-Value : < 2.2e-16
Sensitivity : 0.6878
Specificity : 0.7611
Pos Pred Value : 0.7422
Neg Pred Value : 0.7091
Precision : 0.7422
Recall : 0.6878
F1 : 0.7140
Prevalence : 0.5000
Detection Rate : 0.3439
Detection Prevalence : 0.4634
Balanced Accuracy : 0.7244
'Positive' Class : 0
knn_confusion_matrix_class1 <- confusionMatrix(knn_predictions, true_labels, positive = '1', mode='everything')
print(knn_confusion_matrix_class1)
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 4862 1689
1 2207 5380
Accuracy : 0.7244
95% CI : (0.717, 0.7318)
No Information Rate : 0.5
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.4489
Mcnemar's Test P-Value : < 2.2e-16
Sensitivity : 0.7611
Specificity : 0.6878
Pos Pred Value : 0.7091
Neg Pred Value : 0.7422
Precision : 0.7091
Recall : 0.7611
F1 : 0.7342
Prevalence : 0.5000
Detection Rate : 0.3805
Detection Prevalence : 0.5366
Balanced Accuracy : 0.7244
'Positive' Class : 1
library(glmnet)
Loading required package: Matrix
Loaded glmnet 4.1-6
library(caret)
set.seed(1)
tr = trainControl(method = "cv", number = 5, preProc = "nzv")
tg = expand.grid(alpha = 1, lambda = 10^seq(-4, -2, length =100))
lasso_model <- train(Diabetes_binary ~ ., data = diabetes_train_data, method = "glmnet", trControl = tr, tuneGrid = tg)
lasso_model
glmnet
56554 samples
21 predictor
2 classes: '0', '1'
No pre-processing
Resampling: Cross-Validated (5 fold)
Summary of sample sizes: 45244, 45242, 45244, 45243, 45243
Resampling results across tuning parameters:
lambda Accuracy Kappa
0.0001000000 0.7471091 0.4942182
0.0001047616 0.7471091 0.4942182
0.0001097499 0.7471091 0.4942182
0.0001149757 0.7471091 0.4942182
0.0001204504 0.7471091 0.4942182
0.0001261857 0.7471091 0.4942182
0.0001321941 0.7471091 0.4942182
0.0001384886 0.7471091 0.4942182
0.0001450829 0.7471091 0.4942182
0.0001519911 0.7471091 0.4942182
0.0001592283 0.7471091 0.4942182
0.0001668101 0.7471091 0.4942182
0.0001747528 0.7471091 0.4942182
0.0001830738 0.7471091 0.4942182
0.0001917910 0.7471091 0.4942182
0.0002009233 0.7471091 0.4942182
0.0002104904 0.7471091 0.4942182
0.0002205131 0.7471091 0.4942182
0.0002310130 0.7471091 0.4942182
0.0002420128 0.7471091 0.4942182
0.0002535364 0.7471091 0.4942182
0.0002656088 0.7471091 0.4942182
0.0002782559 0.7471091 0.4942182
0.0002915053 0.7471091 0.4942182
0.0003053856 0.7471091 0.4942182
0.0003199267 0.7471091 0.4942182
0.0003351603 0.7471091 0.4942182
0.0003511192 0.7471091 0.4942182
0.0003678380 0.7471091 0.4942182
0.0003853529 0.7471091 0.4942182
0.0004037017 0.7471091 0.4942182
0.0004229243 0.7471091 0.4942182
0.0004430621 0.7471091 0.4942182
0.0004641589 0.7471091 0.4942182
0.0004862602 0.7471091 0.4942182
0.0005094138 0.7471091 0.4942182
0.0005336699 0.7471091 0.4942182
0.0005590810 0.7471091 0.4942182
0.0005857021 0.7471091 0.4942182
0.0006135907 0.7471091 0.4942182
0.0006428073 0.7471091 0.4942182
0.0006734151 0.7471091 0.4942182
0.0007054802 0.7470914 0.4941828
0.0007390722 0.7471091 0.4942182
0.0007742637 0.7470914 0.4941828
0.0008111308 0.7470914 0.4941828
0.0008497534 0.7470206 0.4940414
0.0008902151 0.7469853 0.4939706
0.0009326033 0.7469853 0.4939706
0.0009770100 0.7470383 0.4940767
0.0010235310 0.7471090 0.4942182
0.0010722672 0.7471267 0.4942536
0.0011233240 0.7471621 0.4943243
0.0011768120 0.7471798 0.4943596
0.0012328467 0.7471975 0.4943950
0.0012915497 0.7471621 0.4943243
0.0013530478 0.7471090 0.4942182
0.0014174742 0.7470737 0.4941474
0.0014849683 0.7470383 0.4940767
0.0015556761 0.7471444 0.4942889
0.0016297508 0.7471267 0.4942535
0.0017073526 0.7470737 0.4941474
0.0017886495 0.7471621 0.4943242
0.0018738174 0.7470737 0.4941474
0.0019630407 0.7470560 0.4941120
0.0020565123 0.7470737 0.4941474
0.0021544347 0.7471798 0.4943596
0.0022570197 0.7471798 0.4943596
0.0023644894 0.7471621 0.4943242
0.0024770764 0.7471267 0.4942535
0.0025950242 0.7470737 0.4941474
0.0027185882 0.7470030 0.4940060
0.0028480359 0.7469499 0.4938998
0.0029836472 0.7470737 0.4941474
0.0031257158 0.7471444 0.4942889
0.0032745492 0.7472151 0.4944303
0.0034304693 0.7473212 0.4946425
0.0035938137 0.7473743 0.4947486
0.0037649358 0.7471974 0.4943949
0.0039442061 0.7472151 0.4944303
0.0041320124 0.7471267 0.4942535
0.0043287613 0.7470383 0.4940767
0.0045348785 0.7469676 0.4939352
0.0047508102 0.7469322 0.4938645
0.0049770236 0.7468615 0.4937230
0.0052140083 0.7469499 0.4938998
0.0054622772 0.7469322 0.4938645
0.0057223677 0.7468438 0.4936876
0.0059948425 0.7467200 0.4934401
0.0062802914 0.7465962 0.4931925
0.0065793322 0.7465078 0.4930157
0.0068926121 0.7464725 0.4929450
0.0072208090 0.7465786 0.4931572
0.0075646333 0.7465255 0.4930511
0.0079248290 0.7465432 0.4930865
0.0083021757 0.7462603 0.4925206
0.0086974900 0.7462957 0.4925914
0.0091116276 0.7459420 0.4918841
0.0095454846 0.7459774 0.4919548
0.0100000000 0.7460127 0.4920256
Tuning parameter 'alpha' was held constant at a value of 1
Accuracy was used to select the optimal model using the largest value.
The final values used for the model were alpha = 1 and lambda = 0.003593814.
lasso_predictions <- predict(lasso_model, newdata = diabetes_test_data, na.action = na.pass)
lasso_confusion_matrix_class0 <- confusionMatrix(lasso_predictions, true_labels, mode='everything')
print(lasso_confusion_matrix_class0)
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 5176 1626
1 1893 5443
Accuracy : 0.7511
95% CI : (0.7439, 0.7582)
No Information Rate : 0.5
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.5022
Mcnemar's Test P-Value : 7.323e-06
Sensitivity : 0.7322
Specificity : 0.7700
Pos Pred Value : 0.7610
Neg Pred Value : 0.7420
Precision : 0.7610
Recall : 0.7322
F1 : 0.7463
Prevalence : 0.5000
Detection Rate : 0.3661
Detection Prevalence : 0.4811
Balanced Accuracy : 0.7511
'Positive' Class : 0
cat("\n\n")
lasso_confusion_matrix_class1 <- confusionMatrix(lasso_predictions, true_labels, mode='everything', positive='1')
print(lasso_confusion_matrix_class1)
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 5176 1626
1 1893 5443
Accuracy : 0.7511
95% CI : (0.7439, 0.7582)
No Information Rate : 0.5
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.5022
Mcnemar's Test P-Value : 7.323e-06
Sensitivity : 0.7700
Specificity : 0.7322
Pos Pred Value : 0.7420
Neg Pred Value : 0.7610
Precision : 0.7420
Recall : 0.7700
F1 : 0.7557
Prevalence : 0.5000
Detection Rate : 0.3850
Detection Prevalence : 0.5189
Balanced Accuracy : 0.7511
'Positive' Class : 1
cat("\n\n")
library(glmnet)
library(caret)
set.seed(1)
tr = trainControl(method = "cv", number = 5, preProc = "nzv")
tg = expand.grid(alpha = 0, lambda = 10^seq(-3, -1, length =100))
ridge_model <- train(Diabetes_binary ~ ., data = diabetes_train_data, method = "glmnet", trControl = tr, tuneGrid = tg)
ridge_model
glmnet
56554 samples
21 predictor
2 classes: '0', '1'
No pre-processing
Resampling: Cross-Validated (5 fold)
Summary of sample sizes: 45244, 45242, 45244, 45243, 45243
Resampling results across tuning parameters:
lambda Accuracy Kappa
0.001000000 0.7463664 0.4927329
0.001047616 0.7463664 0.4927329
0.001097499 0.7463664 0.4927329
0.001149757 0.7463664 0.4927329
0.001204504 0.7463664 0.4927329
0.001261857 0.7463664 0.4927329
0.001321941 0.7463664 0.4927329
0.001384886 0.7463664 0.4927329
0.001450829 0.7463664 0.4927329
0.001519911 0.7463664 0.4927329
0.001592283 0.7463664 0.4927329
0.001668101 0.7463664 0.4927329
0.001747528 0.7463664 0.4927329
0.001830738 0.7463664 0.4927329
0.001917910 0.7463664 0.4927329
0.002009233 0.7463664 0.4927329
0.002104904 0.7463664 0.4927329
0.002205131 0.7463664 0.4927329
0.002310130 0.7463664 0.4927329
0.002420128 0.7463664 0.4927329
0.002535364 0.7463664 0.4927329
0.002656088 0.7463664 0.4927329
0.002782559 0.7463664 0.4927329
0.002915053 0.7463664 0.4927329
0.003053856 0.7463664 0.4927329
0.003199267 0.7463664 0.4927329
0.003351603 0.7463664 0.4927329
0.003511192 0.7463664 0.4927329
0.003678380 0.7463664 0.4927329
0.003853529 0.7463664 0.4927329
0.004037017 0.7463664 0.4927329
0.004229243 0.7463664 0.4927329
0.004430621 0.7463664 0.4927329
0.004641589 0.7463664 0.4927329
0.004862602 0.7463664 0.4927329
0.005094138 0.7463664 0.4927329
0.005336699 0.7463664 0.4927329
0.005590810 0.7463664 0.4927329
0.005857021 0.7463664 0.4927329
0.006135907 0.7463664 0.4927329
0.006428073 0.7463664 0.4927329
0.006734151 0.7463664 0.4927329
0.007054802 0.7463664 0.4927329
0.007390722 0.7463664 0.4927329
0.007742637 0.7463664 0.4927329
0.008111308 0.7463664 0.4927329
0.008497534 0.7463664 0.4927329
0.008902151 0.7463664 0.4927329
0.009326033 0.7463664 0.4927329
0.009770100 0.7463664 0.4927329
0.010235310 0.7463664 0.4927329
0.010722672 0.7463664 0.4927329
0.011233240 0.7463664 0.4927329
0.011768120 0.7463664 0.4927329
0.012328467 0.7463664 0.4927329
0.012915497 0.7463664 0.4927329
0.013530478 0.7463664 0.4927329
0.014174742 0.7463664 0.4927329
0.014849683 0.7463664 0.4927329
0.015556761 0.7463664 0.4927329
0.016297508 0.7463664 0.4927329
0.017073526 0.7463664 0.4927329
0.017886495 0.7463664 0.4927329
0.018738174 0.7463664 0.4927329
0.019630407 0.7463664 0.4927329
0.020565123 0.7464018 0.4928037
0.021544347 0.7464549 0.4929098
0.022570197 0.7465963 0.4931927
0.023644894 0.7465610 0.4931220
0.024770764 0.7464725 0.4929451
0.025950242 0.7465609 0.4931219
0.027185882 0.7463664 0.4927329
0.028480359 0.7461012 0.4922024
0.029836472 0.7461719 0.4923439
0.031257158 0.7461719 0.4923439
0.032745492 0.7458713 0.4917427
0.034304693 0.7457829 0.4915659
0.035938137 0.7459067 0.4918135
0.037649358 0.7458536 0.4917074
0.039442061 0.7459951 0.4919903
0.041320124 0.7458359 0.4916720
0.043287613 0.7458006 0.4916013
0.045348785 0.7457122 0.4914245
0.047508102 0.7458006 0.4916013
0.049770236 0.7459244 0.4918488
0.052140083 0.7459244 0.4918489
0.054622772 0.7456768 0.4913538
0.057223677 0.7456061 0.4912123
0.059948425 0.7456238 0.4912477
0.062802914 0.7455177 0.4910355
0.065793322 0.7455354 0.4910709
0.068926121 0.7455000 0.4910001
0.072208090 0.7455000 0.4910001
0.075646333 0.7456591 0.4913184
0.079248290 0.7454823 0.4909648
0.083021757 0.7456061 0.4912123
0.086974900 0.7455531 0.4911063
0.091116276 0.7454646 0.4909294
0.095454846 0.7456238 0.4912477
0.100000000 0.7455707 0.4911416
Tuning parameter 'alpha' was held constant at a value of 0
Accuracy was used to select the optimal model using the largest value.
The final values used for the model were alpha = 0 and lambda = 0.0225702.
ridge_predictions <- predict(ridge_model, newdata = diabetes_test_data, na.action = na.pass)
ridge_confusion_matrix_class0 <- confusionMatrix(ridge_predictions, true_labels, mode='everything')
print(ridge_confusion_matrix_class0)
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 5184 1646
1 1885 5423
Accuracy : 0.7502
95% CI : (0.743, 0.7574)
No Information Rate : 0.5
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.5005
Mcnemar's Test P-Value : 6.196e-05
Sensitivity : 0.7333
Specificity : 0.7672
Pos Pred Value : 0.7590
Neg Pred Value : 0.7421
Precision : 0.7590
Recall : 0.7333
F1 : 0.7460
Prevalence : 0.5000
Detection Rate : 0.3667
Detection Prevalence : 0.4831
Balanced Accuracy : 0.7502
'Positive' Class : 0
cat("\n\n")
ridge_confusion_matrix_class1 <- confusionMatrix(ridge_predictions, true_labels, mode='everything', positive='1')
print(ridge_confusion_matrix_class1)
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 5184 1646
1 1885 5423
Accuracy : 0.7502
95% CI : (0.743, 0.7574)
No Information Rate : 0.5
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.5005
Mcnemar's Test P-Value : 6.196e-05
Sensitivity : 0.7672
Specificity : 0.7333
Pos Pred Value : 0.7421
Neg Pred Value : 0.7590
Precision : 0.7421
Recall : 0.7672
F1 : 0.7544
Prevalence : 0.5000
Detection Rate : 0.3836
Detection Prevalence : 0.5169
Balanced Accuracy : 0.7502
'Positive' Class : 1
cat("\n\n")
library(glmnet)
library(caret)
set.seed(1)
tr = trainControl(method = "cv", number = 5, preProc = "nzv")
tg = expand.grid(alpha =seq(0, 1, length=10), lambda = 10^seq(-3, 1, length = 100))
enet_model <- train(Diabetes_binary ~ ., data = diabetes_train_data, method = "glmnet", trControl = tr, tuneGrid = tg)
enet_model
glmnet
56554 samples
21 predictor
2 classes: '0', '1'
No pre-processing
Resampling: Cross-Validated (5 fold)
Summary of sample sizes: 45244, 45242, 45244, 45243, 45243
Resampling results across tuning parameters:
alpha lambda Accuracy Kappa
0.0000000 0.001000000 0.7463664 0.4927329
0.0000000 0.001097499 0.7463664 0.4927329
0.0000000 0.001204504 0.7463664 0.4927329
0.0000000 0.001321941 0.7463664 0.4927329
0.0000000 0.001450829 0.7463664 0.4927329
0.0000000 0.001592283 0.7463664 0.4927329
0.0000000 0.001747528 0.7463664 0.4927329
0.0000000 0.001917910 0.7463664 0.4927329
0.0000000 0.002104904 0.7463664 0.4927329
0.0000000 0.002310130 0.7463664 0.4927329
0.0000000 0.002535364 0.7463664 0.4927329
0.0000000 0.002782559 0.7463664 0.4927329
0.0000000 0.003053856 0.7463664 0.4927329
0.0000000 0.003351603 0.7463664 0.4927329
0.0000000 0.003678380 0.7463664 0.4927329
0.0000000 0.004037017 0.7463664 0.4927329
0.0000000 0.004430621 0.7463664 0.4927329
0.0000000 0.004862602 0.7463664 0.4927329
0.0000000 0.005336699 0.7463664 0.4927329
0.0000000 0.005857021 0.7463664 0.4927329
0.0000000 0.006428073 0.7463664 0.4927329
0.0000000 0.007054802 0.7463664 0.4927329
0.0000000 0.007742637 0.7463664 0.4927329
0.0000000 0.008497534 0.7463664 0.4927329
0.0000000 0.009326033 0.7463664 0.4927329
0.0000000 0.010235310 0.7463664 0.4927329
0.0000000 0.011233240 0.7463664 0.4927329
0.0000000 0.012328467 0.7463664 0.4927329
0.0000000 0.013530478 0.7463664 0.4927329
0.0000000 0.014849683 0.7463664 0.4927329
0.0000000 0.016297508 0.7463664 0.4927329
0.0000000 0.017886495 0.7463664 0.4927329
0.0000000 0.019630407 0.7463664 0.4927329
0.0000000 0.021544347 0.7464549 0.4929098
0.0000000 0.023644894 0.7465610 0.4931220
0.0000000 0.025950242 0.7465609 0.4931219
0.0000000 0.028480359 0.7461012 0.4922024
0.0000000 0.031257158 0.7461719 0.4923439
0.0000000 0.034304693 0.7457829 0.4915659
0.0000000 0.037649358 0.7458536 0.4917074
0.0000000 0.041320124 0.7458359 0.4916720
0.0000000 0.045348785 0.7457122 0.4914245
0.0000000 0.049770236 0.7459244 0.4918488
0.0000000 0.054622772 0.7456768 0.4913538
0.0000000 0.059948425 0.7456238 0.4912477
0.0000000 0.065793322 0.7455354 0.4910709
0.0000000 0.072208090 0.7455000 0.4910001
0.0000000 0.079248290 0.7454823 0.4909648
0.0000000 0.086974900 0.7455531 0.4911063
0.0000000 0.095454846 0.7456238 0.4912477
0.0000000 0.104761575 0.7455884 0.4911770
0.0000000 0.114975700 0.7454293 0.4908587
0.0000000 0.126185688 0.7449872 0.4899746
0.0000000 0.138488637 0.7447574 0.4895149
0.0000000 0.151991108 0.7445275 0.4890552
0.0000000 0.166810054 0.7441915 0.4883833
0.0000000 0.183073828 0.7437849 0.4875699
0.0000000 0.200923300 0.7432544 0.4865089
0.0000000 0.220513074 0.7428300 0.4856602
0.0000000 0.242012826 0.7427416 0.4854833
0.0000000 0.265608778 0.7425648 0.4851297
0.0000000 0.291505306 0.7424587 0.4849175
0.0000000 0.319926714 0.7421404 0.4842810
0.0000000 0.351119173 0.7416807 0.4833615
0.0000000 0.385352859 0.7411679 0.4823360
0.0000000 0.422924287 0.7406374 0.4812751
0.0000000 0.464158883 0.7403545 0.4807092
0.0000000 0.509413801 0.7398771 0.4797544
0.0000000 0.559081018 0.7395411 0.4790825
0.0000000 0.613590727 0.7387808 0.4775618
0.0000000 0.673415066 0.7384095 0.4768192
0.0000000 0.739072203 0.7378613 0.4757229
0.0000000 0.811130831 0.7374900 0.4749803
0.0000000 0.890215085 0.7369772 0.4739547
0.0000000 0.977009957 0.7365528 0.4731059
0.0000000 1.072267222 0.7360578 0.4721157
0.0000000 1.176811952 0.7355803 0.4711609
0.0000000 1.291549665 0.7351913 0.4703829
0.0000000 1.417474163 0.7346785 0.4693573
0.0000000 1.555676144 0.7338475 0.4676951
0.0000000 1.707352647 0.7333877 0.4667757
0.0000000 1.873817423 0.7330518 0.4661037
0.0000000 2.056512308 0.7326628 0.4653257
0.0000000 2.257019720 0.7323622 0.4647245
0.0000000 2.477076356 0.7320793 0.4641587
0.0000000 2.718588243 0.7318494 0.4636990
0.0000000 2.983647240 0.7316372 0.4632746
0.0000000 3.274549163 0.7314250 0.4628503
0.0000000 3.593813664 0.7309123 0.4618247
0.0000000 3.944206059 0.7306117 0.4612235
0.0000000 4.328761281 0.7305763 0.4611528
0.0000000 4.750810162 0.7302934 0.4605869
0.0000000 5.214008288 0.7300458 0.4600918
0.0000000 5.722367659 0.7300635 0.4601272
0.0000000 6.280291442 0.7299928 0.4599857
0.0000000 6.892612104 0.7297275 0.4594553
0.0000000 7.564633276 0.7295154 0.4590309
0.0000000 8.302175681 0.7291794 0.4583590
0.0000000 9.111627561 0.7291617 0.4583236
0.0000000 10.000000000 0.7291087 0.4582176
0.1111111 0.001000000 0.7471798 0.4943597
0.1111111 0.001097499 0.7471798 0.4943597
0.1111111 0.001204504 0.7471798 0.4943597
0.1111111 0.001321941 0.7471798 0.4943597
0.1111111 0.001450829 0.7471798 0.4943597
0.1111111 0.001592283 0.7471798 0.4943597
0.1111111 0.001747528 0.7471798 0.4943597
0.1111111 0.001917910 0.7471798 0.4943597
0.1111111 0.002104904 0.7471798 0.4943597
0.1111111 0.002310130 0.7471444 0.4942889
0.1111111 0.002535364 0.7471267 0.4942535
0.1111111 0.002782559 0.7470560 0.4941121
0.1111111 0.003053856 0.7470737 0.4941475
0.1111111 0.003351603 0.7470914 0.4941828
0.1111111 0.003678380 0.7471091 0.4942182
0.1111111 0.004037017 0.7470737 0.4941474
0.1111111 0.004430621 0.7470914 0.4941828
0.1111111 0.004862602 0.7469853 0.4939706
0.1111111 0.005336699 0.7470560 0.4941121
0.1111111 0.005857021 0.7468792 0.4937585
0.1111111 0.006428073 0.7468438 0.4936877
0.1111111 0.007054802 0.7467554 0.4935109
0.1111111 0.007742637 0.7467908 0.4935817
0.1111111 0.008497534 0.7468085 0.4936170
0.1111111 0.009326033 0.7466847 0.4933694
0.1111111 0.010235310 0.7467731 0.4935463
0.1111111 0.011233240 0.7467731 0.4935463
0.1111111 0.012328467 0.7467377 0.4934755
0.1111111 0.013530478 0.7467554 0.4935109
0.1111111 0.014849683 0.7467908 0.4935816
0.1111111 0.016297508 0.7466140 0.4932280
0.1111111 0.017886495 0.7467908 0.4935816
0.1111111 0.019630407 0.7467378 0.4934756
0.1111111 0.021544347 0.7467201 0.4934402
0.1111111 0.023644894 0.7468792 0.4937585
0.1111111 0.025950242 0.7467201 0.4934402
0.1111111 0.028480359 0.7468615 0.4937231
0.1111111 0.031257158 0.7467377 0.4934756
0.1111111 0.034304693 0.7463487 0.4926975
0.1111111 0.037649358 0.7462426 0.4924854
0.1111111 0.041320124 0.7458713 0.4917427
0.1111111 0.045348785 0.7460304 0.4920610
0.1111111 0.049770236 0.7460658 0.4921317
0.1111111 0.054622772 0.7461719 0.4923439
0.1111111 0.059948425 0.7462426 0.4924854
0.1111111 0.065793322 0.7459597 0.4919196
0.1111111 0.072208090 0.7457122 0.4914245
0.1111111 0.079248290 0.7455353 0.4910708
0.1111111 0.086974900 0.7453055 0.4906111
0.1111111 0.095454846 0.7453762 0.4907525
0.1111111 0.104761575 0.7453054 0.4906110
0.1111111 0.114975700 0.7450049 0.4900099
0.1111111 0.126185688 0.7447396 0.4894794
0.1111111 0.138488637 0.7447396 0.4894794
0.1111111 0.151991108 0.7446689 0.4893379
0.1111111 0.166810054 0.7447573 0.4895146
0.1111111 0.183073828 0.7439439 0.4878879
0.1111111 0.200923300 0.7434488 0.4868977
0.1111111 0.220513074 0.7428299 0.4856599
0.1111111 0.242012826 0.7425647 0.4851295
0.1111111 0.265608778 0.7421403 0.4842807
0.1111111 0.291505306 0.7418574 0.4837149
0.1111111 0.319926714 0.7413446 0.4826893
0.1111111 0.351119173 0.7410971 0.4821942
0.1111111 0.385352859 0.7405312 0.4810626
0.1111111 0.422924287 0.7401422 0.4802845
0.1111111 0.464158883 0.7395764 0.4791529
0.1111111 0.509413801 0.7392758 0.4785518
0.1111111 0.559081018 0.7388691 0.4777384
0.1111111 0.613590727 0.7386039 0.4772079
0.1111111 0.673415066 0.7377374 0.4754749
0.1111111 0.739072203 0.7364643 0.4729287
0.1111111 0.811130831 0.7348552 0.4697106
0.1111111 0.890215085 0.7320615 0.4641231
0.1111111 0.977009957 0.7296213 0.4592429
0.1111111 1.072267222 0.7277116 0.4554235
0.1111111 1.176811952 0.7266507 0.4533018
0.1111111 1.291549665 0.7100294 0.4200590
0.1111111 1.417474163 0.7099410 0.4198822
0.1111111 1.555676144 0.7099410 0.4198822
0.1111111 1.707352647 0.7041055 0.4082111
0.1111111 1.873817423 0.4999823 0.0000000
0.1111111 2.056512308 0.4999823 0.0000000
0.1111111 2.257019720 0.4999823 0.0000000
0.1111111 2.477076356 0.4999823 0.0000000
0.1111111 2.718588243 0.4999823 0.0000000
0.1111111 2.983647240 0.4999823 0.0000000
0.1111111 3.274549163 0.4999823 0.0000000
0.1111111 3.593813664 0.4999823 0.0000000
0.1111111 3.944206059 0.4999823 0.0000000
0.1111111 4.328761281 0.4999823 0.0000000
0.1111111 4.750810162 0.4999823 0.0000000
0.1111111 5.214008288 0.4999823 0.0000000
0.1111111 5.722367659 0.4999823 0.0000000
0.1111111 6.280291442 0.4999823 0.0000000
0.1111111 6.892612104 0.4999823 0.0000000
0.1111111 7.564633276 0.4999823 0.0000000
0.1111111 8.302175681 0.4999823 0.0000000
0.1111111 9.111627561 0.4999823 0.0000000
0.1111111 10.000000000 0.4999823 0.0000000
0.2222222 0.001000000 0.7470737 0.4941475
0.2222222 0.001097499 0.7470737 0.4941475
0.2222222 0.001204504 0.7470737 0.4941475
0.2222222 0.001321941 0.7470737 0.4941475
0.2222222 0.001450829 0.7470737 0.4941475
0.2222222 0.001592283 0.7470737 0.4941475
0.2222222 0.001747528 0.7470737 0.4941475
0.2222222 0.001917910 0.7470914 0.4941828
0.2222222 0.002104904 0.7471091 0.4942182
0.2222222 0.002310130 0.7471267 0.4942536
0.2222222 0.002535364 0.7471267 0.4942535
0.2222222 0.002782559 0.7470560 0.4941121
0.2222222 0.003053856 0.7469853 0.4939706
0.2222222 0.003351603 0.7470914 0.4941828
0.2222222 0.003678380 0.7470383 0.4940767
0.2222222 0.004037017 0.7470030 0.4940060
0.2222222 0.004430621 0.7469853 0.4939706
0.2222222 0.004862602 0.7470030 0.4940060
0.2222222 0.005336699 0.7469676 0.4939352
0.2222222 0.005857021 0.7470206 0.4940413
0.2222222 0.006428073 0.7471090 0.4942182
0.2222222 0.007054802 0.7470030 0.4940060
0.2222222 0.007742637 0.7469499 0.4938999
0.2222222 0.008497534 0.7469322 0.4938645
0.2222222 0.009326033 0.7468615 0.4937230
0.2222222 0.010235310 0.7470030 0.4940060
0.2222222 0.011233240 0.7467731 0.4935462
0.2222222 0.012328467 0.7467554 0.4935109
0.2222222 0.013530478 0.7465609 0.4931219
0.2222222 0.014849683 0.7464725 0.4929450
0.2222222 0.016297508 0.7466139 0.4932279
0.2222222 0.017886495 0.7465078 0.4930157
0.2222222 0.019630407 0.7467554 0.4935109
0.2222222 0.021544347 0.7468792 0.4937584
0.2222222 0.023644894 0.7465255 0.4930511
0.2222222 0.025950242 0.7463664 0.4927329
0.2222222 0.028480359 0.7463487 0.4926975
0.2222222 0.031257158 0.7462249 0.4924499
0.2222222 0.034304693 0.7463134 0.4926268
0.2222222 0.037649358 0.7461896 0.4923793
0.2222222 0.041320124 0.7460128 0.4920256
0.2222222 0.045348785 0.7458360 0.4916720
0.2222222 0.049770236 0.7456768 0.4913538
0.2222222 0.054622772 0.7455530 0.4911062
0.2222222 0.059948425 0.7453762 0.4907526
0.2222222 0.065793322 0.7451994 0.4903989
0.2222222 0.072208090 0.7450225 0.4900452
0.2222222 0.079248290 0.7444744 0.4889489
0.2222222 0.086974900 0.7442798 0.4885598
0.2222222 0.095454846 0.7442798 0.4885598
[ reached getOption("max.print") -- omitted 750 rows ]
Accuracy was used to select the optimal model using the largest value.
The final values used for the model were alpha = 0.7777778 and lambda = 0.004430621.
enet_predictions <- predict(enet_model, newdata = diabetes_test_data, na.action = na.pass)
enet_confusion_matrix_class0 <- confusionMatrix(enet_predictions, true_labels, mode='everything')
print(enet_confusion_matrix_class0)
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 5175 1626
1 1894 5443
Accuracy : 0.751
95% CI : (0.7438, 0.7581)
No Information Rate : 0.5
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.5021
Mcnemar's Test P-Value : 6.786e-06
Sensitivity : 0.7321
Specificity : 0.7700
Pos Pred Value : 0.7609
Neg Pred Value : 0.7419
Precision : 0.7609
Recall : 0.7321
F1 : 0.7462
Prevalence : 0.5000
Detection Rate : 0.3660
Detection Prevalence : 0.4810
Balanced Accuracy : 0.7510
'Positive' Class : 0
cat("\n\n")
enet_confusion_matrix_class1 <- confusionMatrix(enet_predictions, true_labels, mode='everything', positive='1')
print(enet_confusion_matrix_class1)
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 5175 1626
1 1894 5443
Accuracy : 0.751
95% CI : (0.7438, 0.7581)
No Information Rate : 0.5
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.5021
Mcnemar's Test P-Value : 6.786e-06
Sensitivity : 0.7700
Specificity : 0.7321
Pos Pred Value : 0.7419
Neg Pred Value : 0.7609
Precision : 0.7419
Recall : 0.7700
F1 : 0.7557
Prevalence : 0.5000
Detection Rate : 0.3850
Detection Prevalence : 0.5190
Balanced Accuracy : 0.7510
'Positive' Class : 1
cat("\n\n")
library(caret)
set.seed(1)
tr = trainControl(method = "cv", number = 5, preProc = "nzv")
rf_model <- train(Diabetes_binary ~ ., data = diabetes_train_data, method = "rf", trControl = tr, importance = TRUE)
rf_model
Random Forest
56554 samples
21 predictor
2 classes: '0', '1'
No pre-processing
Resampling: Cross-Validated (5 fold)
Summary of sample sizes: 45244, 45242, 45244, 45243, 45243
Resampling results across tuning parameters:
mtry Accuracy Kappa
2 0.7489127 0.4978257
11 0.7335291 0.4670583
21 0.7281536 0.4563074
Accuracy was used to select the optimal model using the largest value.
The final value used for the model was mtry = 2.
varImp(rf_model)
rf variable importance
only 20 most important variables shown (out of 21)
rf_predictions <- predict(rf_model, newdata = diabetes_test_data, na.action = na.pass)
rf_confusion_matrix_class0 <- confusionMatrix(rf_predictions, true_labels, mode='everything')
print(rf_confusion_matrix_class0)
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 5241 1196
1 1828 5873
Accuracy : 0.7861
95% CI : (0.7793, 0.7928)
No Information Rate : 0.5
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.5722
Mcnemar's Test P-Value : < 2.2e-16
Sensitivity : 0.7414
Specificity : 0.8308
Pos Pred Value : 0.8142
Neg Pred Value : 0.7626
Precision : 0.8142
Recall : 0.7414
F1 : 0.7761
Prevalence : 0.5000
Detection Rate : 0.3707
Detection Prevalence : 0.4553
Balanced Accuracy : 0.7861
'Positive' Class : 0
cat("\n\n")
rf_confusion_matrix_class1 <- confusionMatrix(rf_predictions, true_labels, mode='everything', positive='1')
print(rf_confusion_matrix_class1)
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 4975 1436
1 2094 5633
Accuracy : 0.7503
95% CI : (0.7431, 0.7574)
No Information Rate : 0.5
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.5006
Mcnemar's Test P-Value : < 2.2e-16
Sensitivity : 0.7969
Specificity : 0.7038
Pos Pred Value : 0.7290
Neg Pred Value : 0.7760
Precision : 0.7290
Recall : 0.7969
F1 : 0.7614
Prevalence : 0.5000
Detection Rate : 0.3984
Detection Prevalence : 0.5465
Balanced Accuracy : 0.7503
'Positive' Class : 1
cat("\n\n")
library(caret)
set.seed(1)
tr = trainControl(method = "cv", number = 5, preProc = "nzv")
gbm_model <- train(Diabetes_binary ~ ., data = diabetes_train_data, method = "gbm", trControl = tr)
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3588 nan 0.1000 0.0139
2 1.3337 nan 0.1000 0.0127
3 1.3131 nan 0.1000 0.0103
4 1.2927 nan 0.1000 0.0101
5 1.2761 nan 0.1000 0.0082
6 1.2610 nan 0.1000 0.0075
7 1.2481 nan 0.1000 0.0063
8 1.2360 nan 0.1000 0.0059
9 1.2262 nan 0.1000 0.0049
10 1.2168 nan 0.1000 0.0045
20 1.1473 nan 0.1000 0.0024
40 1.0818 nan 0.1000 0.0011
60 1.0533 nan 0.1000 0.0005
80 1.0377 nan 0.1000 0.0003
100 1.0279 nan 0.1000 0.0002
120 1.0218 nan 0.1000 0.0001
140 1.0178 nan 0.1000 0.0001
150 1.0164 nan 0.1000 0.0001
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3499 nan 0.1000 0.0181
2 1.3198 nan 0.1000 0.0152
3 1.2949 nan 0.1000 0.0124
4 1.2729 nan 0.1000 0.0110
5 1.2546 nan 0.1000 0.0091
6 1.2379 nan 0.1000 0.0082
7 1.2223 nan 0.1000 0.0075
8 1.2090 nan 0.1000 0.0066
9 1.1967 nan 0.1000 0.0060
10 1.1863 nan 0.1000 0.0051
20 1.1059 nan 0.1000 0.0027
40 1.0462 nan 0.1000 0.0007
60 1.0242 nan 0.1000 0.0003
80 1.0141 nan 0.1000 0.0002
100 1.0090 nan 0.1000 0.0001
120 1.0062 nan 0.1000 0.0001
140 1.0043 nan 0.1000 0.0000
150 1.0035 nan 0.1000 0.0000
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3434 nan 0.1000 0.0213
2 1.3091 nan 0.1000 0.0173
3 1.2808 nan 0.1000 0.0141
4 1.2581 nan 0.1000 0.0116
5 1.2378 nan 0.1000 0.0101
6 1.2202 nan 0.1000 0.0086
7 1.2043 nan 0.1000 0.0081
8 1.1882 nan 0.1000 0.0077
9 1.1753 nan 0.1000 0.0063
10 1.1634 nan 0.1000 0.0057
20 1.0860 nan 0.1000 0.0022
40 1.0306 nan 0.1000 0.0008
60 1.0127 nan 0.1000 0.0002
80 1.0059 nan 0.1000 0.0001
100 1.0024 nan 0.1000 0.0001
120 1.0004 nan 0.1000 0.0000
140 0.9986 nan 0.1000 0.0000
150 0.9980 nan 0.1000 -0.0000
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3590 nan 0.1000 0.0137
2 1.3335 nan 0.1000 0.0127
3 1.3126 nan 0.1000 0.0104
4 1.2930 nan 0.1000 0.0098
5 1.2767 nan 0.1000 0.0080
6 1.2618 nan 0.1000 0.0076
7 1.2494 nan 0.1000 0.0062
8 1.2376 nan 0.1000 0.0059
9 1.2276 nan 0.1000 0.0049
10 1.2180 nan 0.1000 0.0046
20 1.1483 nan 0.1000 0.0024
40 1.0822 nan 0.1000 0.0010
60 1.0534 nan 0.1000 0.0005
80 1.0378 nan 0.1000 0.0003
100 1.0286 nan 0.1000 0.0002
120 1.0223 nan 0.1000 0.0001
140 1.0182 nan 0.1000 0.0001
150 1.0169 nan 0.1000 0.0000
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3503 nan 0.1000 0.0180
2 1.3203 nan 0.1000 0.0150
3 1.2944 nan 0.1000 0.0127
4 1.2728 nan 0.1000 0.0105
5 1.2539 nan 0.1000 0.0093
6 1.2381 nan 0.1000 0.0077
7 1.2235 nan 0.1000 0.0071
8 1.2105 nan 0.1000 0.0063
9 1.1988 nan 0.1000 0.0058
10 1.1877 nan 0.1000 0.0056
20 1.1077 nan 0.1000 0.0026
40 1.0454 nan 0.1000 0.0006
60 1.0235 nan 0.1000 0.0003
80 1.0133 nan 0.1000 0.0002
100 1.0084 nan 0.1000 0.0001
120 1.0055 nan 0.1000 0.0000
140 1.0035 nan 0.1000 0.0000
150 1.0027 nan 0.1000 -0.0000
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3448 nan 0.1000 0.0211
2 1.3105 nan 0.1000 0.0173
3 1.2822 nan 0.1000 0.0140
4 1.2593 nan 0.1000 0.0113
5 1.2386 nan 0.1000 0.0103
6 1.2204 nan 0.1000 0.0090
7 1.2046 nan 0.1000 0.0077
8 1.1909 nan 0.1000 0.0067
9 1.1787 nan 0.1000 0.0060
10 1.1661 nan 0.1000 0.0061
20 1.0843 nan 0.1000 0.0028
40 1.0304 nan 0.1000 0.0007
60 1.0131 nan 0.1000 0.0003
80 1.0058 nan 0.1000 0.0001
100 1.0027 nan 0.1000 -0.0000
120 1.0004 nan 0.1000 0.0000
140 0.9987 nan 0.1000 -0.0000
150 0.9981 nan 0.1000 -0.0000
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3592 nan 0.1000 0.0135
2 1.3337 nan 0.1000 0.0127
3 1.3135 nan 0.1000 0.0103
4 1.2938 nan 0.1000 0.0098
5 1.2775 nan 0.1000 0.0079
6 1.2620 nan 0.1000 0.0075
7 1.2498 nan 0.1000 0.0061
8 1.2378 nan 0.1000 0.0058
9 1.2280 nan 0.1000 0.0047
10 1.2185 nan 0.1000 0.0046
20 1.1488 nan 0.1000 0.0025
40 1.0838 nan 0.1000 0.0009
60 1.0556 nan 0.1000 0.0004
80 1.0398 nan 0.1000 0.0003
100 1.0308 nan 0.1000 0.0001
120 1.0250 nan 0.1000 0.0001
140 1.0207 nan 0.1000 0.0001
150 1.0195 nan 0.1000 0.0000
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3515 nan 0.1000 0.0172
2 1.3211 nan 0.1000 0.0150
3 1.2962 nan 0.1000 0.0124
4 1.2745 nan 0.1000 0.0107
5 1.2561 nan 0.1000 0.0093
6 1.2398 nan 0.1000 0.0081
7 1.2251 nan 0.1000 0.0072
8 1.2122 nan 0.1000 0.0066
9 1.2003 nan 0.1000 0.0058
10 1.1882 nan 0.1000 0.0059
20 1.1087 nan 0.1000 0.0022
40 1.0484 nan 0.1000 0.0007
60 1.0266 nan 0.1000 0.0003
80 1.0165 nan 0.1000 0.0001
100 1.0115 nan 0.1000 0.0000
120 1.0084 nan 0.1000 0.0000
140 1.0066 nan 0.1000 -0.0000
150 1.0060 nan 0.1000 -0.0000
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3436 nan 0.1000 0.0209
2 1.3093 nan 0.1000 0.0171
3 1.2815 nan 0.1000 0.0139
4 1.2584 nan 0.1000 0.0116
5 1.2386 nan 0.1000 0.0098
6 1.2212 nan 0.1000 0.0086
7 1.2060 nan 0.1000 0.0074
8 1.1918 nan 0.1000 0.0071
9 1.1781 nan 0.1000 0.0068
10 1.1673 nan 0.1000 0.0054
20 1.0862 nan 0.1000 0.0029
40 1.0337 nan 0.1000 0.0006
60 1.0157 nan 0.1000 0.0002
80 1.0088 nan 0.1000 0.0000
100 1.0052 nan 0.1000 -0.0000
120 1.0031 nan 0.1000 0.0000
140 1.0012 nan 0.1000 -0.0000
150 1.0004 nan 0.1000 0.0000
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3585 nan 0.1000 0.0138
2 1.3334 nan 0.1000 0.0125
3 1.3120 nan 0.1000 0.0105
4 1.2931 nan 0.1000 0.0096
5 1.2769 nan 0.1000 0.0081
6 1.2621 nan 0.1000 0.0075
7 1.2496 nan 0.1000 0.0061
8 1.2375 nan 0.1000 0.0059
9 1.2277 nan 0.1000 0.0050
10 1.2178 nan 0.1000 0.0048
20 1.1481 nan 0.1000 0.0025
40 1.0815 nan 0.1000 0.0011
60 1.0527 nan 0.1000 0.0006
80 1.0369 nan 0.1000 0.0003
100 1.0272 nan 0.1000 0.0001
120 1.0211 nan 0.1000 0.0001
140 1.0173 nan 0.1000 0.0001
150 1.0158 nan 0.1000 0.0001
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3503 nan 0.1000 0.0180
2 1.3208 nan 0.1000 0.0151
3 1.2963 nan 0.1000 0.0123
4 1.2738 nan 0.1000 0.0112
5 1.2555 nan 0.1000 0.0092
6 1.2389 nan 0.1000 0.0081
7 1.2247 nan 0.1000 0.0070
8 1.2118 nan 0.1000 0.0063
9 1.1992 nan 0.1000 0.0062
10 1.1853 nan 0.1000 0.0068
20 1.1068 nan 0.1000 0.0027
40 1.0462 nan 0.1000 0.0007
60 1.0238 nan 0.1000 0.0002
80 1.0139 nan 0.1000 0.0001
100 1.0090 nan 0.1000 0.0001
120 1.0060 nan 0.1000 0.0000
140 1.0042 nan 0.1000 0.0000
150 1.0034 nan 0.1000 0.0000
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3439 nan 0.1000 0.0210
2 1.3098 nan 0.1000 0.0170
3 1.2816 nan 0.1000 0.0141
4 1.2586 nan 0.1000 0.0117
5 1.2383 nan 0.1000 0.0101
6 1.2203 nan 0.1000 0.0088
7 1.2037 nan 0.1000 0.0082
8 1.1894 nan 0.1000 0.0072
9 1.1773 nan 0.1000 0.0061
10 1.1652 nan 0.1000 0.0059
20 1.0874 nan 0.1000 0.0026
40 1.0315 nan 0.1000 0.0006
60 1.0133 nan 0.1000 0.0002
80 1.0063 nan 0.1000 0.0001
100 1.0026 nan 0.1000 0.0000
120 1.0004 nan 0.1000 -0.0000
140 0.9986 nan 0.1000 0.0000
150 0.9979 nan 0.1000 -0.0000
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3591 nan 0.1000 0.0137
2 1.3332 nan 0.1000 0.0127
3 1.3123 nan 0.1000 0.0104
4 1.2927 nan 0.1000 0.0098
5 1.2765 nan 0.1000 0.0080
6 1.2616 nan 0.1000 0.0076
7 1.2490 nan 0.1000 0.0062
8 1.2370 nan 0.1000 0.0059
9 1.2271 nan 0.1000 0.0047
10 1.2174 nan 0.1000 0.0048
20 1.1486 nan 0.1000 0.0026
40 1.0839 nan 0.1000 0.0009
60 1.0551 nan 0.1000 0.0004
80 1.0395 nan 0.1000 0.0003
100 1.0303 nan 0.1000 0.0001
120 1.0243 nan 0.1000 0.0001
140 1.0204 nan 0.1000 0.0000
150 1.0190 nan 0.1000 0.0001
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3498 nan 0.1000 0.0177
2 1.3199 nan 0.1000 0.0151
3 1.2945 nan 0.1000 0.0127
4 1.2733 nan 0.1000 0.0108
5 1.2553 nan 0.1000 0.0089
6 1.2387 nan 0.1000 0.0083
7 1.2248 nan 0.1000 0.0068
8 1.2115 nan 0.1000 0.0066
9 1.1990 nan 0.1000 0.0061
10 1.1856 nan 0.1000 0.0068
20 1.1074 nan 0.1000 0.0028
40 1.0474 nan 0.1000 0.0009
60 1.0262 nan 0.1000 0.0004
80 1.0165 nan 0.1000 0.0002
100 1.0116 nan 0.1000 0.0000
120 1.0089 nan 0.1000 0.0000
140 1.0071 nan 0.1000 -0.0000
150 1.0063 nan 0.1000 0.0000
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3438 nan 0.1000 0.0210
2 1.3093 nan 0.1000 0.0171
3 1.2817 nan 0.1000 0.0139
4 1.2584 nan 0.1000 0.0114
5 1.2384 nan 0.1000 0.0099
6 1.2207 nan 0.1000 0.0088
7 1.2051 nan 0.1000 0.0079
8 1.1898 nan 0.1000 0.0075
9 1.1774 nan 0.1000 0.0061
10 1.1662 nan 0.1000 0.0055
20 1.0882 nan 0.1000 0.0022
40 1.0331 nan 0.1000 0.0006
60 1.0159 nan 0.1000 0.0002
80 1.0090 nan 0.1000 0.0001
100 1.0058 nan 0.1000 -0.0000
120 1.0037 nan 0.1000 -0.0000
140 1.0019 nan 0.1000 -0.0000
150 1.0012 nan 0.1000 -0.0000
Iter TrainDeviance ValidDeviance StepSize Improve
1 1.3444 nan 0.1000 0.0211
2 1.3101 nan 0.1000 0.0172
3 1.2821 nan 0.1000 0.0141
4 1.2589 nan 0.1000 0.0116
5 1.2385 nan 0.1000 0.0099
6 1.2214 nan 0.1000 0.0086
7 1.2044 nan 0.1000 0.0084
8 1.1894 nan 0.1000 0.0073
9 1.1759 nan 0.1000 0.0065
10 1.1643 nan 0.1000 0.0056
20 1.0841 nan 0.1000 0.0026
40 1.0317 nan 0.1000 0.0008
60 1.0146 nan 0.1000 0.0001
80 1.0074 nan 0.1000 0.0001
100 1.0042 nan 0.1000 0.0000
120 1.0022 nan 0.1000 -0.0000
140 1.0006 nan 0.1000 -0.0000
150 1.0000 nan 0.1000 -0.0000
gbm_model
Stochastic Gradient Boosting
56554 samples
21 predictor
2 classes: '0', '1'
No pre-processing
Resampling: Cross-Validated (5 fold)
Summary of sample sizes: 45244, 45242, 45244, 45243, 45243
Resampling results across tuning parameters:
interaction.depth n.trees Accuracy Kappa
1 50 0.7416098 0.4832197
1 100 0.7476218 0.4952437
1 150 0.7493016 0.4986033
2 50 0.7474626 0.4949254
2 100 0.7506100 0.5012201
2 150 0.7511582 0.5023165
3 50 0.7493369 0.4986740
3 100 0.7516710 0.5033421
3 150 0.7522368 0.5044738
Tuning parameter 'shrinkage' was held constant at a value of 0.1
Tuning parameter 'n.minobsinnode' was held constant
at a value of 10
Accuracy was used to select the optimal model using the largest value.
The final values used for the model were n.trees = 150, interaction.depth = 3, shrinkage = 0.1 and n.minobsinnode = 10.
gbm_predictions <- predict(gbm_model, newdata = diabetes_test_data, na.action = na.pass)
gbm_confusion_matrix_class0 <- confusionMatrix(gbm_predictions, true_labels, mode='everything')
print(gbm_confusion_matrix_class0)
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 5026 1512
1 2043 5557
Accuracy : 0.7486
95% CI : (0.7413, 0.7557)
No Information Rate : 0.5
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.4971
Mcnemar's Test P-Value : < 2.2e-16
Sensitivity : 0.7110
Specificity : 0.7861
Pos Pred Value : 0.7687
Neg Pred Value : 0.7312
Precision : 0.7687
Recall : 0.7110
F1 : 0.7387
Prevalence : 0.5000
Detection Rate : 0.3555
Detection Prevalence : 0.4624
Balanced Accuracy : 0.7486
'Positive' Class : 0
cat("\n\n")
gbm_confusion_matrix_class1 <- confusionMatrix(gbm_predictions, true_labels, mode='everything', positive='1')
print(gbm_confusion_matrix_class1)
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 5057 1478
1 2012 5591
Accuracy : 0.7531
95% CI : (0.746, 0.7602)
No Information Rate : 0.5
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.5063
Mcnemar's Test P-Value : < 2.2e-16
Sensitivity : 0.7909
Specificity : 0.7154
Pos Pred Value : 0.7354
Neg Pred Value : 0.7738
Precision : 0.7354
Recall : 0.7909
F1 : 0.7621
Prevalence : 0.5000
Detection Rate : 0.3955
Detection Prevalence : 0.5378
Balanced Accuracy : 0.7531
'Positive' Class : 1
cat("\n\n")
library(caret)
set.seed(1)
tr = trainControl(method = "cv", number = 5, preProc = "nzv")
svm_linear_model <- train(Diabetes_binary ~ ., data = diabetes_train_data, method = "svmLinear", trControl = tr)
svm_linear_model
Support Vector Machines with Linear Kernel
56554 samples
21 predictor
2 classes: '0', '1'
No pre-processing
Resampling: Cross-Validated (5 fold)
Summary of sample sizes: 45244, 45242, 45244, 45243, 45243
Resampling results:
Accuracy Kappa
0.7472151 0.4944303
Tuning parameter 'C' was held constant at a value of 1
svm_linear_predictions <- predict(svm_linear_model, newdata = diabetes_test_data, na.action = na.pass)
svm_linear_confusion_matrix_class0 <- confusionMatrix(svm_linear_predictions, true_labels, mode='everything')
print(svm_linear_confusion_matrix_class0)
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 4935 1474
1 2134 5595
Accuracy : 0.7448
95% CI : (0.7375, 0.752)
No Information Rate : 0.5
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.4896
Mcnemar's Test P-Value : < 2.2e-16
Sensitivity : 0.6981
Specificity : 0.7915
Pos Pred Value : 0.7700
Neg Pred Value : 0.7239
Precision : 0.7700
Recall : 0.6981
F1 : 0.7323
Prevalence : 0.5000
Detection Rate : 0.3491
Detection Prevalence : 0.4533
Balanced Accuracy : 0.7448
'Positive' Class : 0
cat("\n\n")
svm_linear_confusion_matrix_class1 <- confusionMatrix(svm_linear_predictions, true_labels, mode='everything', positive='1')
print(svm_linear_confusion_matrix_class1)
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 5025 1472
1 2044 5597
Accuracy : 0.7513
95% CI : (0.7441, 0.7584)
No Information Rate : 0.5
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.5026
Mcnemar's Test P-Value : < 2.2e-16
Sensitivity : 0.7918
Specificity : 0.7109
Pos Pred Value : 0.7325
Neg Pred Value : 0.7734
Precision : 0.7325
Recall : 0.7918
F1 : 0.7610
Prevalence : 0.5000
Detection Rate : 0.3959
Detection Prevalence : 0.5405
Balanced Accuracy : 0.7513
'Positive' Class : 1
cat("\n\n")
library(caret)
set.seed(1)
tr = trainControl(method = "cv", number = 5, preProc = "nzv")
svm_radial_model <- train(Diabetes_binary ~ ., data = diabetes_train_data, method = "svmRadial", trControl = tr)
svm_radial_model
Support Vector Machines with Radial Basis Function Kernel
56554 samples
21 predictor
2 classes: '0', '1'
No pre-processing
Resampling: Cross-Validated (5 fold)
Summary of sample sizes: 45244, 45242, 45244, 45243, 45243
Resampling results across tuning parameters:
C Accuracy Kappa
0.25 0.7512644 0.5025290
0.50 0.7515120 0.5030241
1.00 0.7508047 0.5016095
Tuning parameter 'sigma' was held constant at a value of 0.03579394
Accuracy was used to select the optimal model using the largest value.
The final values used for the model were sigma = 0.03579394 and C = 0.5.
svm_radial_predictions <- predict(svm_radial_model, newdata = diabetes_test_data, na.action = na.pass)
svm_radial_confusion_matrix_class0 <- confusionMatrix(svm_radial_predictions, true_labels, mode='everything')
print(svm_radial_confusion_matrix_class0)
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 4867 1250
1 2202 5819
Accuracy : 0.7558
95% CI : (0.7487, 0.7629)
No Information Rate : 0.5
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.5117
Mcnemar's Test P-Value : < 2.2e-16
Sensitivity : 0.6885
Specificity : 0.8232
Pos Pred Value : 0.7957
Neg Pred Value : 0.7255
Precision : 0.7957
Recall : 0.6885
F1 : 0.7382
Prevalence : 0.5000
Detection Rate : 0.3442
Detection Prevalence : 0.4327
Balanced Accuracy : 0.7558
'Positive' Class : 0
cat("\n\n")
svm_radial_confusion_matrix_class1 <- confusionMatrix(svm_radial_predictions, true_labels, mode='everything', positive='1')
print(svm_radial_confusion_matrix_class1)
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 4913 1356
1 2156 5713
Accuracy : 0.7516
95% CI : (0.7444, 0.7587)
No Information Rate : 0.5
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.5032
Mcnemar's Test P-Value : < 2.2e-16
Sensitivity : 0.8082
Specificity : 0.6950
Pos Pred Value : 0.7260
Neg Pred Value : 0.7837
Precision : 0.7260
Recall : 0.8082
F1 : 0.7649
Prevalence : 0.5000
Detection Rate : 0.4041
Detection Prevalence : 0.5566
Balanced Accuracy : 0.7516
'Positive' Class : 1
cat("\n\n")
compare=resamples(list(KNN= knn_model, Lasso=lasso_model, Ridge=ridge_model, Enet=enet_model, RF=rf_model, GBM=gbm_model, SVML=svm_linear_model, SVMR=svm_radial_model))
summary(compare)
Call:
summary.resamples(object = compare)
Models: KNN, Lasso, Ridge, Enet, RF, GBM, SVML, SVMR
Number of resamples: 5
Accuracy
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
KNN 0.7167359 0.7174182 0.7217753 0.7202850 0.7221289 0.7233666 0
Lasso 0.7454031 0.7464415 0.7481874 0.7473743 0.7483865 0.7484527 0
Ridge 0.7435467 0.7441429 0.7470604 0.7465963 0.7478338 0.7503979 0
Enet 0.7453147 0.7463531 0.7481213 0.7473743 0.7485411 0.7485411 0
RF 0.7440771 0.7488286 0.7488948 0.7489127 0.7490938 0.7536693 0
GBM 0.7511272 0.7512821 0.7515693 0.7522368 0.7518564 0.7553492 0
SVML 0.7449611 0.7459995 0.7477677 0.7472151 0.7478338 0.7495137 0
SVMR 0.7477016 0.7503315 0.7503979 0.7515120 0.7519229 0.7572060 0
Kappa
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
KNN 0.4334757 0.4348364 0.4435539 0.4405701 0.4442544 0.4467301 0
Lasso 0.4908062 0.4928815 0.4963749 0.4947486 0.4967749 0.4969054 0
Ridge 0.4870934 0.4882843 0.4941224 0.4931927 0.4956676 0.5007958 0
Enet 0.4906294 0.4927047 0.4962444 0.4947486 0.4970822 0.4970822 0
RF 0.4881542 0.4976541 0.4977896 0.4978257 0.4981918 0.5073386 0
GBM 0.5022579 0.5025641 0.5031357 0.5044738 0.5037129 0.5106985 0
SVML 0.4899222 0.4920027 0.4955316 0.4944303 0.4956676 0.4990274 0
SVMR 0.4954031 0.5006585 0.5007958 0.5030241 0.5038513 0.5144120 0
library(caret)
parti_indices = createDataPartition(diabetes_train_data$Diabetes_binary, p=0.9, list = FALSE)
diabetes_index <- which(names(diabetes_train_data)=='Diabetes_binary')
diabetes_train_data1 = diabetes_train_data[parti_indices, -diabetes_index]
diabetes_train_data1
training_labels = diabetes_train_data[parti_indices, diabetes_index]
training_labels <- as.numeric(training_labels) - 1
training_labels
[1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[58] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[115] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[172] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[229] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[286] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[343] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[400] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[457] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[514] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[571] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[628] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[685] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[742] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[799] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[856] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[913] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[970] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[ reached getOption("max.print") -- omitted 49900 entries ]
diabetes_validation_data = diabetes_train_data[-parti_indices, -diabetes_index]
diabetes_validation_data
validation_labels = diabetes_train_data[-parti_indices, diabetes_index]
validation_labels <- as.numeric(validation_labels) - 1
validation_labels
[1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[58] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[115] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[172] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[229] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[286] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[343] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[400] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[457] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[514] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[571] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[628] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[685] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[742] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[799] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[856] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[913] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[970] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[ reached getOption("max.print") -- omitted 4654 entries ]
diabetes_testing_data = diabetes_test_data[, -diabetes_index]
diabetes_testing_data
test_labels = as.numeric(diabetes_test_data[,diabetes_index]) - 1
test_labels
[1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[58] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[115] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[172] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[229] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[286] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[343] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[400] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[457] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[514] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[571] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[628] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[685] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[742] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[799] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[856] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[913] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[970] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
[ reached getOption("max.print") -- omitted 13138 entries ]
library(caret)
str(diabetes_train_data1)
'data.frame': 50900 obs. of 21 variables:
$ HighBP : Factor w/ 2 levels "0","1": 2 1 2 1 1 1 1 2 2 2 ...
$ HighChol : Factor w/ 2 levels "0","1": 1 1 2 1 2 1 1 2 2 1 ...
$ CholCheck : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
$ BMI : num 26 26 28 29 26 32 27 24 27 58 ...
$ Smoker : Factor w/ 2 levels "0","1": 1 1 2 2 2 1 2 2 1 1 ...
$ Stroke : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ HeartDiseaseorAttack: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 2 1 1 ...
$ PhysActivity : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 1 2 2 1 ...
$ Fruits : Factor w/ 2 levels "0","1": 1 2 2 2 2 2 2 2 2 2 ...
$ Veggies : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
$ HvyAlcoholConsump : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 1 1 1 ...
$ AnyHealthcare : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
$ NoDocbcCost : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ GenHlth : num 3 1 3 2 1 3 3 3 2 3 ...
$ MentHlth : num 5 0 0 0 0 0 0 0 0 3 ...
$ PhysHlth : num 30 10 3 0 0 0 6 4 0 3 ...
$ DiffWalk : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ Sex : Factor w/ 2 levels "0","1": 2 2 2 1 2 1 2 1 2 2 ...
$ Age : num 4 13 11 8 13 3 6 12 7 10 ...
$ Education : num 6 6 6 5 5 6 4 4 6 4 ...
$ Income : num 8 8 8 8 6 8 4 6 8 6 ...
preproc <- preProcess(diabetes_train_data1, method = c("center", "scale"))
train_imputed <- predict(preproc, diabetes_train_data1)
str(train_imputed)
'data.frame': 50900 obs. of 21 variables:
$ HighBP : Factor w/ 2 levels "0","1": 2 1 2 1 1 1 1 2 2 2 ...
$ HighChol : Factor w/ 2 levels "0","1": 1 1 2 1 2 1 1 2 2 1 ...
$ CholCheck : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
$ BMI : num -0.542 -0.542 -0.262 -0.121 -0.542 ...
$ Smoker : Factor w/ 2 levels "0","1": 1 1 2 2 2 1 2 2 1 1 ...
$ Stroke : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ HeartDiseaseorAttack: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 2 1 1 ...
$ PhysActivity : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 1 2 2 1 ...
$ Fruits : Factor w/ 2 levels "0","1": 1 2 2 2 2 2 2 2 2 2 ...
$ Veggies : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
$ HvyAlcoholConsump : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 1 1 1 1 ...
$ AnyHealthcare : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
$ NoDocbcCost : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ GenHlth : num 0.145 -1.649 0.145 -0.752 -1.649 ...
$ MentHlth : num 0.154 -0.46 -0.46 -0.46 -0.46 ...
$ PhysHlth : num 2.412 0.419 -0.278 -0.577 -0.577 ...
$ DiffWalk : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ Sex : Factor w/ 2 levels "0","1": 2 2 2 1 2 1 2 1 2 2 ...
$ Age : num -1.6 1.55 0.85 -0.2 1.55 ...
$ Education : num 1.049 1.049 1.049 0.0775 0.0775 ...
$ Income : num 1.061 1.061 1.061 1.061 0.142 ...
train_imputed
test_imputed <- predict(preproc, diabetes_testing_data)
test_imputed
val_imputed <- predict(preproc, diabetes_validation_data)
val_imputed
library(data.table)
library(mltools)
train_encoded <- one_hot(as.data.table(train_imputed), dropUnusedLevels = FALSE)
train_encoded
test_encoded <- one_hot(as.data.table(test_imputed), dropUnusedLevels = FALSE)
test_encoded
val_encoded <- one_hot(as.data.table(val_imputed), dropUnusedLevels = FALSE)
val_encoded
nzv_indices <- nearZeroVar(train_encoded, saveMetrics= TRUE)
train_encoded_nzv <- train_encoded[,-nzv_indices]
val_encoded_nzv <- val_encoded[,-nzv_indices]
test_encoded_nzv <- test_encoded[,-nzv_indices]
train_encoded
val_encoded
test_encoded
library(keras)
model <- keras_model_sequential() %>%
layer_dense(units = 32, activation = "relu", input_shape = dim(train_encoded)[2]) %>%
layer_dropout(rate = 0.2) %>%
layer_dense(units = 16, activation = "relu") %>%
layer_dropout(rate = 0.2) %>%
layer_dense(units = 1, activation = "sigmoid")
model %>% compile(
loss = "binary_crossentropy",
optimizer = "adam"
)
history <- model %>% fit(as.matrix(train_encoded), training_labels,
epochs = 20,
batch_size = 20, verbose=2,
validation_data = list(as.matrix(val_encoded), validation_labels)
)
Epoch 1/20
2545/2545 - 4s - loss: 0.5373 - val_loss: 0.4990 - 4s/epoch - 2ms/step
Epoch 2/20
2545/2545 - 3s - loss: 0.5190 - val_loss: 0.5035 - 3s/epoch - 1ms/step
Epoch 3/20
2545/2545 - 3s - loss: 0.5149 - val_loss: 0.4970 - 3s/epoch - 1ms/step
Epoch 4/20
2545/2545 - 3s - loss: 0.5132 - val_loss: 0.4960 - 3s/epoch - 1ms/step
Epoch 5/20
2545/2545 - 3s - loss: 0.5118 - val_loss: 0.4958 - 3s/epoch - 1ms/step
Epoch 6/20
2545/2545 - 3s - loss: 0.5105 - val_loss: 0.4961 - 3s/epoch - 1ms/step
Epoch 7/20
2545/2545 - 3s - loss: 0.5104 - val_loss: 0.4964 - 3s/epoch - 1ms/step
Epoch 8/20
2545/2545 - 3s - loss: 0.5100 - val_loss: 0.4963 - 3s/epoch - 1ms/step
Epoch 9/20
2545/2545 - 3s - loss: 0.5100 - val_loss: 0.4979 - 3s/epoch - 1ms/step
Epoch 10/20
2545/2545 - 3s - loss: 0.5087 - val_loss: 0.4967 - 3s/epoch - 1ms/step
Epoch 11/20
2545/2545 - 3s - loss: 0.5073 - val_loss: 0.4946 - 3s/epoch - 1ms/step
Epoch 12/20
2545/2545 - 3s - loss: 0.5080 - val_loss: 0.4976 - 3s/epoch - 1ms/step
Epoch 13/20
2545/2545 - 3s - loss: 0.5074 - val_loss: 0.4951 - 3s/epoch - 1ms/step
Epoch 14/20
2545/2545 - 3s - loss: 0.5070 - val_loss: 0.4962 - 3s/epoch - 1ms/step
Epoch 15/20
2545/2545 - 3s - loss: 0.5075 - val_loss: 0.4940 - 3s/epoch - 1ms/step
Epoch 16/20
2545/2545 - 3s - loss: 0.5076 - val_loss: 0.4954 - 3s/epoch - 1ms/step
Epoch 17/20
2545/2545 - 3s - loss: 0.5069 - val_loss: 0.4977 - 3s/epoch - 1ms/step
Epoch 18/20
2545/2545 - 3s - loss: 0.5068 - val_loss: 0.4967 - 3s/epoch - 1ms/step
Epoch 19/20
2545/2545 - 3s - loss: 0.5066 - val_loss: 0.5032 - 3s/epoch - 1ms/step
Epoch 20/20
2545/2545 - 3s - loss: 0.5074 - val_loss: 0.4948 - 3s/epoch - 1ms/step
knitr::include_graphics("final_plot1.png")
predictions <- model %>% predict(as.matrix(test_encoded))
1/442 [..............................] - ETA: 41s
71/442 [===>..........................] - ETA: 0s
142/442 [========>.....................] - ETA: 0s
218/442 [=============>................] - ETA: 0s
287/442 [==================>...........] - ETA: 0s
363/442 [=======================>......] - ETA: 0s
441/442 [============================>.] - ETA: 0s
442/442 [==============================] - 0s 705us/step
442/442 [==============================] - 0s 705us/step
predicted_labels <- as.factor(ifelse(predictions <= 0.5, 0, 1)[,1])
confusion_matrix_class0 <- confusionMatrix(predicted_labels, as.factor(test_labels), mode='everything')
print(confusion_matrix_class0)
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 5089 1499
1 1980 5570
Accuracy : 0.7539
95% CI : (0.7467, 0.761)
No Information Rate : 0.5
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.5079
Mcnemar's Test P-Value : 4.021e-16
Sensitivity : 0.7199
Specificity : 0.7879
Pos Pred Value : 0.7725
Neg Pred Value : 0.7377
Precision : 0.7725
Recall : 0.7199
F1 : 0.7453
Prevalence : 0.5000
Detection Rate : 0.3600
Detection Prevalence : 0.4660
Balanced Accuracy : 0.7539
'Positive' Class : 0
confusion_matrix_class1 <- confusionMatrix(predicted_labels, as.factor(test_labels), mode='everything', positive='1')
print(confusion_matrix_class1)
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 5089 1499
1 1980 5570
Accuracy : 0.7539
95% CI : (0.7467, 0.761)
No Information Rate : 0.5
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.5079
Mcnemar's Test P-Value : 4.021e-16
Sensitivity : 0.7879
Specificity : 0.7199
Pos Pred Value : 0.7377
Neg Pred Value : 0.7725
Precision : 0.7377
Recall : 0.7879
F1 : 0.7620
Prevalence : 0.5000
Detection Rate : 0.3940
Detection Prevalence : 0.5340
Balanced Accuracy : 0.7539
'Positive' Class : 1
library(keras)
library(tfruns)
runs= tuning_run("tuning_script_5.R",
flags=list(
learning_rate=c(0.1, 0.5, 0.01, 0.001),
units1=c(8, 16, 32, 64, 128, 512),
units2=c(8, 16, 32, 64, 128),
units2=c(8, 16, 32, 64, 128),
batch_size=c(8,16, 32, 64),
dropout=c(0.1, 0.2, 0.3, 0.4, 0.5)
),
sample= 0.001
)
12,000 total combinations of flags
(sampled to 12 combinations)
y
Training run 1/12 (flags = list(0.01, 64, 8, 16, 16, 0.1))
Using run directory runs/2023-05-10T03-43-50Z
> FLAGS= flags(
+ flag_numeric("learning_rate", 0.01),
+ flag_numeric("units1", 32),
+ flag_numeric('units2', 16),
+ flag_numeric('units3', 8) .... [TRUNCATED]
> model2 <- keras_model_sequential() %>%
+ layer_dense(units = FLAGS$units1, activation = "relu",
+ input_shape = dim(train_encoded[]) .... [TRUNCATED]
> opt= optimizer_adam(learning_rate= FLAGS$learning_rate)
> model2 %>% compile(
+ loss = "binary_crossentropy",
+ optimizer = opt )
> history <- model %>% fit(as.matrix(train_encoded), training_labels,
+ epochs = 20,
+ batch_size = .... [TRUNCATED]
Epoch 1/20
WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0008s vs `on_train_batch_end` time: 0.0036s). Check your callbacks.
2545/2545 - 3s - loss: 0.5070 - val_loss: 0.4961 - 3s/epoch - 1ms/step
Epoch 2/20
2545/2545 - 3s - loss: 0.5062 - val_loss: 0.4960 - 3s/epoch - 1ms/step
Epoch 3/20
2545/2545 - 3s - loss: 0.5056 - val_loss: 0.4959 - 3s/epoch - 1ms/step
Epoch 4/20
2545/2545 - 2s - loss: 0.5049 - val_loss: 0.4991 - 2s/epoch - 972us/step
Epoch 5/20
2545/2545 - 3s - loss: 0.5055 - val_loss: 0.4972 - 3s/epoch - 1ms/step
Epoch 6/20
2545/2545 - 3s - loss: 0.5049 - val_loss: 0.4966 - 3s/epoch - 1ms/step
Epoch 7/20
2545/2545 - 3s - loss: 0.5053 - val_loss: 0.4962 - 3s/epoch - 1ms/step
Epoch 8/20
2545/2545 - 3s - loss: 0.5039 - val_loss: 0.4964 - 3s/epoch - 1ms/step
Epoch 9/20
2545/2545 - 3s - loss: 0.5053 - val_loss: 0.4994 - 3s/epoch - 1ms/step
Epoch 10/20
2545/2545 - 3s - loss: 0.5054 - val_loss: 0.4978 - 3s/epoch - 1ms/step
Epoch 11/20
2545/2545 - 3s - loss: 0.5044 - val_loss: 0.4972 - 3s/epoch - 1ms/step
Epoch 12/20
2545/2545 - 3s - loss: 0.5049 - val_loss: 0.4971 - 3s/epoch - 1ms/step
Epoch 13/20
2545/2545 - 3s - loss: 0.5045 - val_loss: 0.4980 - 3s/epoch - 1ms/step
Epoch 14/20
2545/2545 - 3s - loss: 0.5036 - val_loss: 0.4997 - 3s/epoch - 1ms/step
Epoch 15/20
2545/2545 - 3s - loss: 0.5044 - val_loss: 0.4978 - 3s/epoch - 1ms/step
Epoch 16/20
2545/2545 - 3s - loss: 0.5049 - val_loss: 0.5000 - 3s/epoch - 1ms/step
Epoch 17/20
2545/2545 - 3s - loss: 0.5043 - val_loss: 0.5012 - 3s/epoch - 1ms/step
Epoch 18/20
2545/2545 - 3s - loss: 0.5039 - val_loss: 0.4979 - 3s/epoch - 1ms/step
Epoch 19/20
2545/2545 - 3s - loss: 0.5043 - val_loss: 0.4973 - 3s/epoch - 1ms/step
Epoch 20/20
2545/2545 - 3s - loss: 0.5045 - val_loss: 0.4995 - 3s/epoch - 1ms/step
Run completed: runs/2023-05-10T03-43-50Z
Training run 2/12 (flags = list(0.1, 128, 16, 128, 8, 0.5))
Using run directory runs/2023-05-10T03-44-47Z
> FLAGS= flags(
+ flag_numeric("learning_rate", 0.01),
+ flag_numeric("units1", 32),
+ flag_numeric('units2', 16),
+ flag_numeric('units3', 8) .... [TRUNCATED]
> model2 <- keras_model_sequential() %>%
+ layer_dense(units = FLAGS$units1, activation = "relu",
+ input_shape = dim(train_encoded[]) .... [TRUNCATED]
> opt= optimizer_adam(learning_rate= FLAGS$learning_rate)
> model2 %>% compile(
+ loss = "binary_crossentropy",
+ optimizer = opt )
> history <- model %>% fit(as.matrix(train_encoded), training_labels,
+ epochs = 20,
+ batch_size = .... [TRUNCATED]
Epoch 1/20
WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0008s vs `on_train_batch_end` time: 0.0029s). Check your callbacks.
2545/2545 - 3s - loss: 0.5032 - val_loss: 0.4974 - 3s/epoch - 1ms/step
Epoch 2/20
2545/2545 - 3s - loss: 0.5029 - val_loss: 0.4981 - 3s/epoch - 1ms/step
Epoch 3/20
2545/2545 - 3s - loss: 0.5046 - val_loss: 0.4980 - 3s/epoch - 1ms/step
Epoch 4/20
2545/2545 - 3s - loss: 0.5034 - val_loss: 0.4983 - 3s/epoch - 1ms/step
Epoch 5/20
2545/2545 - 3s - loss: 0.5036 - val_loss: 0.4996 - 3s/epoch - 1ms/step
Epoch 6/20
2545/2545 - 3s - loss: 0.5039 - val_loss: 0.4991 - 3s/epoch - 1ms/step
Epoch 7/20
2545/2545 - 3s - loss: 0.5033 - val_loss: 0.4976 - 3s/epoch - 1ms/step
Epoch 8/20
2545/2545 - 3s - loss: 0.5032 - val_loss: 0.4990 - 3s/epoch - 1ms/step
Epoch 9/20
2545/2545 - 3s - loss: 0.5035 - val_loss: 0.4970 - 3s/epoch - 1ms/step
Epoch 10/20
2545/2545 - 3s - loss: 0.5028 - val_loss: 0.5010 - 3s/epoch - 1ms/step
Epoch 11/20
2545/2545 - 3s - loss: 0.5036 - val_loss: 0.4981 - 3s/epoch - 1ms/step
Epoch 12/20
2545/2545 - 3s - loss: 0.5033 - val_loss: 0.4996 - 3s/epoch - 1ms/step
Epoch 13/20
2545/2545 - 3s - loss: 0.5040 - val_loss: 0.4987 - 3s/epoch - 1ms/step
Epoch 14/20
2545/2545 - 3s - loss: 0.5037 - val_loss: 0.4991 - 3s/epoch - 1ms/step
Epoch 15/20
2545/2545 - 3s - loss: 0.5036 - val_loss: 0.4993 - 3s/epoch - 1ms/step
Epoch 16/20
2545/2545 - 3s - loss: 0.5040 - val_loss: 0.5008 - 3s/epoch - 1ms/step
Epoch 17/20
2545/2545 - 3s - loss: 0.5023 - val_loss: 0.4993 - 3s/epoch - 1ms/step
Epoch 18/20
2545/2545 - 3s - loss: 0.5029 - val_loss: 0.4996 - 3s/epoch - 1ms/step
Epoch 19/20
2545/2545 - 3s - loss: 0.5025 - val_loss: 0.4989 - 3s/epoch - 1ms/step
Epoch 20/20
2545/2545 - 3s - loss: 0.5015 - val_loss: 0.4996 - 3s/epoch - 1ms/step
Run completed: runs/2023-05-10T03-44-47Z
Training run 3/12 (flags = list(0.01, 32, 16, 32, 16, 0.3))
Using run directory runs/2023-05-10T03-45-44Z
> FLAGS= flags(
+ flag_numeric("learning_rate", 0.01),
+ flag_numeric("units1", 32),
+ flag_numeric('units2', 16),
+ flag_numeric('units3', 8) .... [TRUNCATED]
> model2 <- keras_model_sequential() %>%
+ layer_dense(units = FLAGS$units1, activation = "relu",
+ input_shape = dim(train_encoded[]) .... [TRUNCATED]
> opt= optimizer_adam(learning_rate= FLAGS$learning_rate)
> model2 %>% compile(
+ loss = "binary_crossentropy",
+ optimizer = opt )
> history <- model %>% fit(as.matrix(train_encoded), training_labels,
+ epochs = 20,
+ batch_size = .... [TRUNCATED]
Epoch 1/20
WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0008s vs `on_train_batch_end` time: 0.0031s). Check your callbacks.
2545/2545 - 4s - loss: 0.5028 - val_loss: 0.5007 - 4s/epoch - 1ms/step
Epoch 2/20
2545/2545 - 3s - loss: 0.5028 - val_loss: 0.4990 - 3s/epoch - 1ms/step
Epoch 3/20
2545/2545 - 3s - loss: 0.5029 - val_loss: 0.5055 - 3s/epoch - 1ms/step
Epoch 4/20
2545/2545 - 3s - loss: 0.5024 - val_loss: 0.4978 - 3s/epoch - 1ms/step
Epoch 5/20
2545/2545 - 3s - loss: 0.5029 - val_loss: 0.4980 - 3s/epoch - 1ms/step
Epoch 6/20
2545/2545 - 3s - loss: 0.5024 - val_loss: 0.4980 - 3s/epoch - 1ms/step
Epoch 7/20
2545/2545 - 3s - loss: 0.5019 - val_loss: 0.4985 - 3s/epoch - 1ms/step
Epoch 8/20
2545/2545 - 3s - loss: 0.5019 - val_loss: 0.4973 - 3s/epoch - 1ms/step
Epoch 9/20
2545/2545 - 3s - loss: 0.5033 - val_loss: 0.4996 - 3s/epoch - 1ms/step
Epoch 10/20
2545/2545 - 3s - loss: 0.5023 - val_loss: 0.4999 - 3s/epoch - 1ms/step
Epoch 11/20
2545/2545 - 3s - loss: 0.5023 - val_loss: 0.4994 - 3s/epoch - 1ms/step
Epoch 12/20
2545/2545 - 3s - loss: 0.5023 - val_loss: 0.4995 - 3s/epoch - 1ms/step
Epoch 13/20
2545/2545 - 3s - loss: 0.5014 - val_loss: 0.4978 - 3s/epoch - 1ms/step
Epoch 14/20
2545/2545 - 3s - loss: 0.5023 - val_loss: 0.4983 - 3s/epoch - 1ms/step
Epoch 15/20
2545/2545 - 3s - loss: 0.5028 - val_loss: 0.4988 - 3s/epoch - 1ms/step
Epoch 16/20
2545/2545 - 3s - loss: 0.5015 - val_loss: 0.5020 - 3s/epoch - 1ms/step
Epoch 17/20
2545/2545 - 3s - loss: 0.5031 - val_loss: 0.4994 - 3s/epoch - 1ms/step
Epoch 18/20
2545/2545 - 3s - loss: 0.5025 - val_loss: 0.5030 - 3s/epoch - 1ms/step
Epoch 19/20
2545/2545 - 3s - loss: 0.5024 - val_loss: 0.4991 - 3s/epoch - 1ms/step
Epoch 20/20
2545/2545 - 3s - loss: 0.5018 - val_loss: 0.4992 - 3s/epoch - 1ms/step
Run completed: runs/2023-05-10T03-45-44Z
Training run 4/12 (flags = list(0.01, 16, 8, 32, 8, 0.4))
Using run directory runs/2023-05-10T03-46-43Z
> FLAGS= flags(
+ flag_numeric("learning_rate", 0.01),
+ flag_numeric("units1", 32),
+ flag_numeric('units2', 16),
+ flag_numeric('units3', 8) .... [TRUNCATED]
> model2 <- keras_model_sequential() %>%
+ layer_dense(units = FLAGS$units1, activation = "relu",
+ input_shape = dim(train_encoded[]) .... [TRUNCATED]
> opt= optimizer_adam(learning_rate= FLAGS$learning_rate)
> model2 %>% compile(
+ loss = "binary_crossentropy",
+ optimizer = opt )
> history <- model %>% fit(as.matrix(train_encoded), training_labels,
+ epochs = 20,
+ batch_size = .... [TRUNCATED]
Epoch 1/20
WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0008s vs `on_train_batch_end` time: 0.0028s). Check your callbacks.
2545/2545 - 3s - loss: 0.5016 - val_loss: 0.4983 - 3s/epoch - 1ms/step
Epoch 2/20
2545/2545 - 3s - loss: 0.5018 - val_loss: 0.4998 - 3s/epoch - 1ms/step
Epoch 3/20
2545/2545 - 3s - loss: 0.5027 - val_loss: 0.4997 - 3s/epoch - 1ms/step
Epoch 4/20
2545/2545 - 3s - loss: 0.5014 - val_loss: 0.4966 - 3s/epoch - 1ms/step
Epoch 5/20
2545/2545 - 3s - loss: 0.5028 - val_loss: 0.4982 - 3s/epoch - 1ms/step
Epoch 6/20
2545/2545 - 3s - loss: 0.5014 - val_loss: 0.4999 - 3s/epoch - 1ms/step
Epoch 7/20
2545/2545 - 3s - loss: 0.5015 - val_loss: 0.4988 - 3s/epoch - 1ms/step
Epoch 8/20
2545/2545 - 3s - loss: 0.5021 - val_loss: 0.4994 - 3s/epoch - 1ms/step
Epoch 9/20
2545/2545 - 3s - loss: 0.5018 - val_loss: 0.5001 - 3s/epoch - 1ms/step
Epoch 10/20
2545/2545 - 3s - loss: 0.5015 - val_loss: 0.4991 - 3s/epoch - 1ms/step
Epoch 11/20
2545/2545 - 3s - loss: 0.5013 - val_loss: 0.4974 - 3s/epoch - 1ms/step
Epoch 12/20
2545/2545 - 3s - loss: 0.5018 - val_loss: 0.4985 - 3s/epoch - 1ms/step
Epoch 13/20
2545/2545 - 3s - loss: 0.5024 - val_loss: 0.4979 - 3s/epoch - 1ms/step
Epoch 14/20
2545/2545 - 3s - loss: 0.5015 - val_loss: 0.4981 - 3s/epoch - 1ms/step
Epoch 15/20
2545/2545 - 3s - loss: 0.5002 - val_loss: 0.4981 - 3s/epoch - 1ms/step
Epoch 16/20
2545/2545 - 3s - loss: 0.5014 - val_loss: 0.4996 - 3s/epoch - 1ms/step
Epoch 17/20
2545/2545 - 3s - loss: 0.5024 - val_loss: 0.4974 - 3s/epoch - 1ms/step
Epoch 18/20
2545/2545 - 3s - loss: 0.5010 - val_loss: 0.4990 - 3s/epoch - 1ms/step
Epoch 19/20
2545/2545 - 3s - loss: 0.5025 - val_loss: 0.5016 - 3s/epoch - 1ms/step
Epoch 20/20
2545/2545 - 3s - loss: 0.5013 - val_loss: 0.5002 - 3s/epoch - 1ms/step
Run completed: runs/2023-05-10T03-46-43Z
Training run 5/12 (flags = list(0.001, 512, 64, 128, 32, 0.3))
Using run directory runs/2023-05-10T03-47-40Z
> FLAGS= flags(
+ flag_numeric("learning_rate", 0.01),
+ flag_numeric("units1", 32),
+ flag_numeric('units2', 16),
+ flag_numeric('units3', 8) .... [TRUNCATED]
> model2 <- keras_model_sequential() %>%
+ layer_dense(units = FLAGS$units1, activation = "relu",
+ input_shape = dim(train_encoded[]) .... [TRUNCATED]
> opt= optimizer_adam(learning_rate= FLAGS$learning_rate)
> model2 %>% compile(
+ loss = "binary_crossentropy",
+ optimizer = opt )
> history <- model %>% fit(as.matrix(train_encoded), training_labels,
+ epochs = 20,
+ batch_size = .... [TRUNCATED]
Epoch 1/20
WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0008s vs `on_train_batch_end` time: 0.0029s). Check your callbacks.
2545/2545 - 3s - loss: 0.5015 - val_loss: 0.5003 - 3s/epoch - 1ms/step
Epoch 2/20
2545/2545 - 3s - loss: 0.5016 - val_loss: 0.4982 - 3s/epoch - 1ms/step
Epoch 3/20
2545/2545 - 3s - loss: 0.5012 - val_loss: 0.4992 - 3s/epoch - 1ms/step
Epoch 4/20
2545/2545 - 3s - loss: 0.5017 - val_loss: 0.4988 - 3s/epoch - 1ms/step
Epoch 5/20
2545/2545 - 3s - loss: 0.5011 - val_loss: 0.5000 - 3s/epoch - 1ms/step
Epoch 6/20
2545/2545 - 3s - loss: 0.5015 - val_loss: 0.4995 - 3s/epoch - 1ms/step
Epoch 7/20
2545/2545 - 3s - loss: 0.5023 - val_loss: 0.5003 - 3s/epoch - 1ms/step
Epoch 8/20
2545/2545 - 3s - loss: 0.5014 - val_loss: 0.4984 - 3s/epoch - 1ms/step
Epoch 9/20
2545/2545 - 3s - loss: 0.5015 - val_loss: 0.4990 - 3s/epoch - 1ms/step
Epoch 10/20
2545/2545 - 3s - loss: 0.5013 - val_loss: 0.4997 - 3s/epoch - 1ms/step
Epoch 11/20
2545/2545 - 3s - loss: 0.5014 - val_loss: 0.4999 - 3s/epoch - 1ms/step
Epoch 12/20
2545/2545 - 3s - loss: 0.5023 - val_loss: 0.4982 - 3s/epoch - 1ms/step
Epoch 13/20
2545/2545 - 3s - loss: 0.5006 - val_loss: 0.5003 - 3s/epoch - 1ms/step
Epoch 14/20
2545/2545 - 3s - loss: 0.5018 - val_loss: 0.4998 - 3s/epoch - 1ms/step
Epoch 15/20
2545/2545 - 3s - loss: 0.5016 - val_loss: 0.5014 - 3s/epoch - 1ms/step
Epoch 16/20
2545/2545 - 3s - loss: 0.5016 - val_loss: 0.4997 - 3s/epoch - 1ms/step
Epoch 17/20
2545/2545 - 3s - loss: 0.5012 - val_loss: 0.4999 - 3s/epoch - 1ms/step
Epoch 18/20
2545/2545 - 3s - loss: 0.5014 - val_loss: 0.4992 - 3s/epoch - 1ms/step
Epoch 19/20
2545/2545 - 3s - loss: 0.5018 - val_loss: 0.4983 - 3s/epoch - 1ms/step
Epoch 20/20
2545/2545 - 3s - loss: 0.5014 - val_loss: 0.4996 - 3s/epoch - 1ms/step
Run completed: runs/2023-05-10T03-47-40Z
Training run 6/12 (flags = list(0.001, 8, 128, 64, 8, 0.1))
Using run directory runs/2023-05-10T03-48-37Z
> FLAGS= flags(
+ flag_numeric("learning_rate", 0.01),
+ flag_numeric("units1", 32),
+ flag_numeric('units2', 16),
+ flag_numeric('units3', 8) .... [TRUNCATED]
> model2 <- keras_model_sequential() %>%
+ layer_dense(units = FLAGS$units1, activation = "relu",
+ input_shape = dim(train_encoded[]) .... [TRUNCATED]
> opt= optimizer_adam(learning_rate= FLAGS$learning_rate)
> model2 %>% compile(
+ loss = "binary_crossentropy",
+ optimizer = opt )
> history <- model %>% fit(as.matrix(train_encoded), training_labels,
+ epochs = 20,
+ batch_size = .... [TRUNCATED]
Epoch 1/20
WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0009s vs `on_train_batch_end` time: 0.0032s). Check your callbacks.
2545/2545 - 3s - loss: 0.4999 - val_loss: 0.4983 - 3s/epoch - 1ms/step
Epoch 2/20
2545/2545 - 3s - loss: 0.5017 - val_loss: 0.5023 - 3s/epoch - 1ms/step
Epoch 3/20
2545/2545 - 3s - loss: 0.5005 - val_loss: 0.4989 - 3s/epoch - 1ms/step
Epoch 4/20
2545/2545 - 3s - loss: 0.5005 - val_loss: 0.4995 - 3s/epoch - 1ms/step
Epoch 5/20
2545/2545 - 3s - loss: 0.5014 - val_loss: 0.4989 - 3s/epoch - 1ms/step
Epoch 6/20
2545/2545 - 3s - loss: 0.5005 - val_loss: 0.4997 - 3s/epoch - 1ms/step
Epoch 7/20
2545/2545 - 3s - loss: 0.4992 - val_loss: 0.4998 - 3s/epoch - 1ms/step
Epoch 8/20
2545/2545 - 3s - loss: 0.5002 - val_loss: 0.4990 - 3s/epoch - 1ms/step
Epoch 9/20
2545/2545 - 3s - loss: 0.5002 - val_loss: 0.5011 - 3s/epoch - 1ms/step
Epoch 10/20
2545/2545 - 3s - loss: 0.4999 - val_loss: 0.5005 - 3s/epoch - 1ms/step
Epoch 11/20
2545/2545 - 3s - loss: 0.5008 - val_loss: 0.5004 - 3s/epoch - 1ms/step
Epoch 12/20
2545/2545 - 3s - loss: 0.5014 - val_loss: 0.5007 - 3s/epoch - 1ms/step
Epoch 13/20
2545/2545 - 3s - loss: 0.5014 - val_loss: 0.5007 - 3s/epoch - 1ms/step
Epoch 14/20
2545/2545 - 3s - loss: 0.5009 - val_loss: 0.5008 - 3s/epoch - 1ms/step
Epoch 15/20
2545/2545 - 3s - loss: 0.5025 - val_loss: 0.5004 - 3s/epoch - 1ms/step
Epoch 16/20
2545/2545 - 3s - loss: 0.5007 - val_loss: 0.5019 - 3s/epoch - 1ms/step
Epoch 17/20
2545/2545 - 3s - loss: 0.4999 - val_loss: 0.5017 - 3s/epoch - 1ms/step
Epoch 18/20
2545/2545 - 3s - loss: 0.5002 - val_loss: 0.4992 - 3s/epoch - 1ms/step
Epoch 19/20
2545/2545 - 3s - loss: 0.5014 - val_loss: 0.5008 - 3s/epoch - 1ms/step
Epoch 20/20
2545/2545 - 3s - loss: 0.5002 - val_loss: 0.4992 - 3s/epoch - 1ms/step
Run completed: runs/2023-05-10T03-48-37Z
Training run 7/12 (flags = list(0.01, 32, 16, 8, 32, 0.5))
Using run directory runs/2023-05-10T03-49-35Z
> FLAGS= flags(
+ flag_numeric("learning_rate", 0.01),
+ flag_numeric("units1", 32),
+ flag_numeric('units2', 16),
+ flag_numeric('units3', 8) .... [TRUNCATED]
> model2 <- keras_model_sequential() %>%
+ layer_dense(units = FLAGS$units1, activation = "relu",
+ input_shape = dim(train_encoded[]) .... [TRUNCATED]
> opt= optimizer_adam(learning_rate= FLAGS$learning_rate)
> model2 %>% compile(
+ loss = "binary_crossentropy",
+ optimizer = opt )
> history <- model %>% fit(as.matrix(train_encoded), training_labels,
+ epochs = 20,
+ batch_size = .... [TRUNCATED]
Epoch 1/20
WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0008s vs `on_train_batch_end` time: 0.0030s). Check your callbacks.
2545/2545 - 3s - loss: 0.5018 - val_loss: 0.4991 - 3s/epoch - 1ms/step
Epoch 2/20
2545/2545 - 3s - loss: 0.5009 - val_loss: 0.4980 - 3s/epoch - 1ms/step
Epoch 3/20
2545/2545 - 3s - loss: 0.5003 - val_loss: 0.4997 - 3s/epoch - 1ms/step
Epoch 4/20
2545/2545 - 3s - loss: 0.5006 - val_loss: 0.5008 - 3s/epoch - 1ms/step
Epoch 5/20
2545/2545 - 3s - loss: 0.5001 - val_loss: 0.5012 - 3s/epoch - 1ms/step
Epoch 6/20
2545/2545 - 3s - loss: 0.5005 - val_loss: 0.5008 - 3s/epoch - 1ms/step
Epoch 7/20
2545/2545 - 3s - loss: 0.5015 - val_loss: 0.4985 - 3s/epoch - 1ms/step
Epoch 8/20
2545/2545 - 3s - loss: 0.5003 - val_loss: 0.4987 - 3s/epoch - 1ms/step
Epoch 9/20
2545/2545 - 3s - loss: 0.5002 - val_loss: 0.4994 - 3s/epoch - 1ms/step
Epoch 10/20
2545/2545 - 3s - loss: 0.4997 - val_loss: 0.5008 - 3s/epoch - 1ms/step
Epoch 11/20
2545/2545 - 3s - loss: 0.5005 - val_loss: 0.4993 - 3s/epoch - 1ms/step
Epoch 12/20
2545/2545 - 3s - loss: 0.5001 - val_loss: 0.4994 - 3s/epoch - 1ms/step
Epoch 13/20
2545/2545 - 3s - loss: 0.5001 - val_loss: 0.4993 - 3s/epoch - 1ms/step
Epoch 14/20
2545/2545 - 3s - loss: 0.5016 - val_loss: 0.4994 - 3s/epoch - 1ms/step
Epoch 15/20
2545/2545 - 3s - loss: 0.4997 - val_loss: 0.4988 - 3s/epoch - 1ms/step
Epoch 16/20
2545/2545 - 3s - loss: 0.5009 - val_loss: 0.4988 - 3s/epoch - 1ms/step
Epoch 17/20
2545/2545 - 3s - loss: 0.4988 - val_loss: 0.4992 - 3s/epoch - 1ms/step
Epoch 18/20
2545/2545 - 3s - loss: 0.4998 - val_loss: 0.5005 - 3s/epoch - 1ms/step
Epoch 19/20
2545/2545 - 3s - loss: 0.5000 - val_loss: 0.5004 - 3s/epoch - 1ms/step
Epoch 20/20
2545/2545 - 3s - loss: 0.5003 - val_loss: 0.5023 - 3s/epoch - 1ms/step
Run completed: runs/2023-05-10T03-49-35Z
Training run 8/12 (flags = list(0.01, 512, 64, 16, 64, 0.2))
Using run directory runs/2023-05-10T03-50-32Z
> FLAGS= flags(
+ flag_numeric("learning_rate", 0.01),
+ flag_numeric("units1", 32),
+ flag_numeric('units2', 16),
+ flag_numeric('units3', 8) .... [TRUNCATED]
> model2 <- keras_model_sequential() %>%
+ layer_dense(units = FLAGS$units1, activation = "relu",
+ input_shape = dim(train_encoded[]) .... [TRUNCATED]
> opt= optimizer_adam(learning_rate= FLAGS$learning_rate)
> model2 %>% compile(
+ loss = "binary_crossentropy",
+ optimizer = opt )
> history <- model %>% fit(as.matrix(train_encoded), training_labels,
+ epochs = 20,
+ batch_size = .... [TRUNCATED]
Epoch 1/20
WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0008s vs `on_train_batch_end` time: 0.0029s). Check your callbacks.
2545/2545 - 3s - loss: 0.4995 - val_loss: 0.5000 - 3s/epoch - 1ms/step
Epoch 2/20
2545/2545 - 3s - loss: 0.5005 - val_loss: 0.5011 - 3s/epoch - 1ms/step
Epoch 3/20
2545/2545 - 3s - loss: 0.5005 - val_loss: 0.5013 - 3s/epoch - 1ms/step
Epoch 4/20
2545/2545 - 3s - loss: 0.4998 - val_loss: 0.5033 - 3s/epoch - 1ms/step
Epoch 5/20
2545/2545 - 3s - loss: 0.4998 - val_loss: 0.5016 - 3s/epoch - 1ms/step
Epoch 6/20
2545/2545 - 3s - loss: 0.5002 - val_loss: 0.5010 - 3s/epoch - 1ms/step
Epoch 7/20
2545/2545 - 3s - loss: 0.5003 - val_loss: 0.4996 - 3s/epoch - 1ms/step
Epoch 8/20
2545/2545 - 3s - loss: 0.5012 - val_loss: 0.5005 - 3s/epoch - 1ms/step
Epoch 9/20
2545/2545 - 3s - loss: 0.4999 - val_loss: 0.4997 - 3s/epoch - 1ms/step
Epoch 10/20
2545/2545 - 3s - loss: 0.5006 - val_loss: 0.4999 - 3s/epoch - 1ms/step
Epoch 11/20
2545/2545 - 3s - loss: 0.4999 - val_loss: 0.5008 - 3s/epoch - 1ms/step
Epoch 12/20
2545/2545 - 3s - loss: 0.5002 - val_loss: 0.4997 - 3s/epoch - 1ms/step
Epoch 13/20
2545/2545 - 3s - loss: 0.5000 - val_loss: 0.5011 - 3s/epoch - 1ms/step
Epoch 14/20
2545/2545 - 3s - loss: 0.4997 - val_loss: 0.4995 - 3s/epoch - 1ms/step
Epoch 15/20
2545/2545 - 3s - loss: 0.4991 - val_loss: 0.5003 - 3s/epoch - 1ms/step
Epoch 16/20
2545/2545 - 3s - loss: 0.5001 - val_loss: 0.4991 - 3s/epoch - 1ms/step
Epoch 17/20
2545/2545 - 3s - loss: 0.4991 - val_loss: 0.5006 - 3s/epoch - 1ms/step
Epoch 18/20
2545/2545 - 3s - loss: 0.5004 - val_loss: 0.4994 - 3s/epoch - 1ms/step
Epoch 19/20
2545/2545 - 3s - loss: 0.5002 - val_loss: 0.5027 - 3s/epoch - 1ms/step
Epoch 20/20
2545/2545 - 3s - loss: 0.4996 - val_loss: 0.5005 - 3s/epoch - 1ms/step
Run completed: runs/2023-05-10T03-50-32Z
Training run 9/12 (flags = list(0.1, 128, 128, 64, 8, 0.3))
Using run directory runs/2023-05-10T03-51-29Z
> FLAGS= flags(
+ flag_numeric("learning_rate", 0.01),
+ flag_numeric("units1", 32),
+ flag_numeric('units2', 16),
+ flag_numeric('units3', 8) .... [TRUNCATED]
> model2 <- keras_model_sequential() %>%
+ layer_dense(units = FLAGS$units1, activation = "relu",
+ input_shape = dim(train_encoded[]) .... [TRUNCATED]
> opt= optimizer_adam(learning_rate= FLAGS$learning_rate)
> model2 %>% compile(
+ loss = "binary_crossentropy",
+ optimizer = opt )
> history <- model %>% fit(as.matrix(train_encoded), training_labels,
+ epochs = 20,
+ batch_size = .... [TRUNCATED]
Epoch 1/20
WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0009s vs `on_train_batch_end` time: 0.0030s). Check your callbacks.
2545/2545 - 3s - loss: 0.5004 - val_loss: 0.5029 - 3s/epoch - 1ms/step
Epoch 2/20
2545/2545 - 3s - loss: 0.4993 - val_loss: 0.5030 - 3s/epoch - 1ms/step
Epoch 3/20
2545/2545 - 3s - loss: 0.4999 - val_loss: 0.5039 - 3s/epoch - 1ms/step
Epoch 4/20
2545/2545 - 3s - loss: 0.5008 - val_loss: 0.5004 - 3s/epoch - 1ms/step
Epoch 5/20
2545/2545 - 3s - loss: 0.5007 - val_loss: 0.5029 - 3s/epoch - 1ms/step
Epoch 6/20
2545/2545 - 3s - loss: 0.5002 - val_loss: 0.5009 - 3s/epoch - 1ms/step
Epoch 7/20
2545/2545 - 3s - loss: 0.4991 - val_loss: 0.5011 - 3s/epoch - 1ms/step
Epoch 8/20
2545/2545 - 3s - loss: 0.4995 - val_loss: 0.5032 - 3s/epoch - 1ms/step
Epoch 9/20
2545/2545 - 3s - loss: 0.5002 - val_loss: 0.4995 - 3s/epoch - 1ms/step
Epoch 10/20
2545/2545 - 3s - loss: 0.5007 - val_loss: 0.4999 - 3s/epoch - 1ms/step
Epoch 11/20
2545/2545 - 3s - loss: 0.5003 - val_loss: 0.5006 - 3s/epoch - 1ms/step
Epoch 12/20
2545/2545 - 3s - loss: 0.5002 - val_loss: 0.5024 - 3s/epoch - 1ms/step
Epoch 13/20
2545/2545 - 3s - loss: 0.4998 - val_loss: 0.5016 - 3s/epoch - 1ms/step
Epoch 14/20
2545/2545 - 3s - loss: 0.4991 - val_loss: 0.5016 - 3s/epoch - 1ms/step
Epoch 15/20
2545/2545 - 3s - loss: 0.5000 - val_loss: 0.5007 - 3s/epoch - 1ms/step
Epoch 16/20
2545/2545 - 3s - loss: 0.5007 - val_loss: 0.5002 - 3s/epoch - 1ms/step
Epoch 17/20
2545/2545 - 3s - loss: 0.5007 - val_loss: 0.4998 - 3s/epoch - 1ms/step
Epoch 18/20
2545/2545 - 3s - loss: 0.4996 - val_loss: 0.5031 - 3s/epoch - 1ms/step
Epoch 19/20
2545/2545 - 3s - loss: 0.5003 - val_loss: 0.5001 - 3s/epoch - 1ms/step
Epoch 20/20
2545/2545 - 3s - loss: 0.5000 - val_loss: 0.5020 - 3s/epoch - 1ms/step
Run completed: runs/2023-05-10T03-51-29Z
Training run 10/12 (flags = list(0.1, 128, 128, 8, 64, 0.5))
Using run directory runs/2023-05-10T03-52-26Z
> FLAGS= flags(
+ flag_numeric("learning_rate", 0.01),
+ flag_numeric("units1", 32),
+ flag_numeric('units2', 16),
+ flag_numeric('units3', 8) .... [TRUNCATED]
> model2 <- keras_model_sequential() %>%
+ layer_dense(units = FLAGS$units1, activation = "relu",
+ input_shape = dim(train_encoded[]) .... [TRUNCATED]
> opt= optimizer_adam(learning_rate= FLAGS$learning_rate)
> model2 %>% compile(
+ loss = "binary_crossentropy",
+ optimizer = opt )
> history <- model %>% fit(as.matrix(train_encoded), training_labels,
+ epochs = 20,
+ batch_size = .... [TRUNCATED]
Epoch 1/20
WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0008s vs `on_train_batch_end` time: 0.0028s). Check your callbacks.
2545/2545 - 3s - loss: 0.4997 - val_loss: 0.5005 - 3s/epoch - 1ms/step
Epoch 2/20
2545/2545 - 3s - loss: 0.4994 - val_loss: 0.5029 - 3s/epoch - 1ms/step
Epoch 3/20
2545/2545 - 3s - loss: 0.5000 - val_loss: 0.5009 - 3s/epoch - 1ms/step
Epoch 4/20
2545/2545 - 3s - loss: 0.4992 - val_loss: 0.5003 - 3s/epoch - 1ms/step
Epoch 5/20
2545/2545 - 3s - loss: 0.4986 - val_loss: 0.5025 - 3s/epoch - 1ms/step
Epoch 6/20
2545/2545 - 3s - loss: 0.4997 - val_loss: 0.5015 - 3s/epoch - 1ms/step
Epoch 7/20
2545/2545 - 3s - loss: 0.4984 - val_loss: 0.5007 - 3s/epoch - 1ms/step
Epoch 8/20
2545/2545 - 3s - loss: 0.4996 - val_loss: 0.4996 - 3s/epoch - 1ms/step
Epoch 9/20
2545/2545 - 3s - loss: 0.4999 - val_loss: 0.5049 - 3s/epoch - 1ms/step
Epoch 10/20
2545/2545 - 3s - loss: 0.4999 - val_loss: 0.5003 - 3s/epoch - 1ms/step
Epoch 11/20
2545/2545 - 3s - loss: 0.4990 - val_loss: 0.5016 - 3s/epoch - 1ms/step
Epoch 12/20
2545/2545 - 3s - loss: 0.5001 - val_loss: 0.4995 - 3s/epoch - 1ms/step
Epoch 13/20
2545/2545 - 3s - loss: 0.4995 - val_loss: 0.5010 - 3s/epoch - 1ms/step
Epoch 14/20
2545/2545 - 3s - loss: 0.4989 - val_loss: 0.5007 - 3s/epoch - 1ms/step
Epoch 15/20
2545/2545 - 3s - loss: 0.5001 - val_loss: 0.5029 - 3s/epoch - 1ms/step
Epoch 16/20
2545/2545 - 3s - loss: 0.4997 - val_loss: 0.5013 - 3s/epoch - 1ms/step
Epoch 17/20
2545/2545 - 3s - loss: 0.4995 - val_loss: 0.5040 - 3s/epoch - 1ms/step
Epoch 18/20
2545/2545 - 3s - loss: 0.4980 - val_loss: 0.4997 - 3s/epoch - 1ms/step
Epoch 19/20
2545/2545 - 3s - loss: 0.4989 - val_loss: 0.5000 - 3s/epoch - 1ms/step
Epoch 20/20
2545/2545 - 3s - loss: 0.4988 - val_loss: 0.5021 - 3s/epoch - 1ms/step
Run completed: runs/2023-05-10T03-52-26Z
Training run 11/12 (flags = list(0.1, 512, 8, 128, 16, 0.3))
Using run directory runs/2023-05-10T03-53-23Z
> FLAGS= flags(
+ flag_numeric("learning_rate", 0.01),
+ flag_numeric("units1", 32),
+ flag_numeric('units2', 16),
+ flag_numeric('units3', 8) .... [TRUNCATED]
> model2 <- keras_model_sequential() %>%
+ layer_dense(units = FLAGS$units1, activation = "relu",
+ input_shape = dim(train_encoded[]) .... [TRUNCATED]
> opt= optimizer_adam(learning_rate= FLAGS$learning_rate)
> model2 %>% compile(
+ loss = "binary_crossentropy",
+ optimizer = opt )
> history <- model %>% fit(as.matrix(train_encoded), training_labels,
+ epochs = 20,
+ batch_size = .... [TRUNCATED]
Epoch 1/20
WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0008s vs `on_train_batch_end` time: 0.0029s). Check your callbacks.
2545/2545 - 3s - loss: 0.5003 - val_loss: 0.5008 - 3s/epoch - 1ms/step
Epoch 2/20
2545/2545 - 3s - loss: 0.4994 - val_loss: 0.5009 - 3s/epoch - 1ms/step
Epoch 3/20
2545/2545 - 3s - loss: 0.4998 - val_loss: 0.5017 - 3s/epoch - 1ms/step
Epoch 4/20
2545/2545 - 3s - loss: 0.4992 - val_loss: 0.5030 - 3s/epoch - 1ms/step
Epoch 5/20
2545/2545 - 3s - loss: 0.4997 - val_loss: 0.5007 - 3s/epoch - 1ms/step
Epoch 6/20
2545/2545 - 3s - loss: 0.4996 - val_loss: 0.5029 - 3s/epoch - 1ms/step
Epoch 7/20
2545/2545 - 3s - loss: 0.5002 - val_loss: 0.5008 - 3s/epoch - 1ms/step
Epoch 8/20
2545/2545 - 3s - loss: 0.5001 - val_loss: 0.5017 - 3s/epoch - 1ms/step
Epoch 9/20
2545/2545 - 3s - loss: 0.4994 - val_loss: 0.4997 - 3s/epoch - 1ms/step
Epoch 10/20
2545/2545 - 3s - loss: 0.4997 - val_loss: 0.5008 - 3s/epoch - 1ms/step
Epoch 11/20
2545/2545 - 3s - loss: 0.5003 - val_loss: 0.4995 - 3s/epoch - 1ms/step
Epoch 12/20
2545/2545 - 3s - loss: 0.5000 - val_loss: 0.5027 - 3s/epoch - 1ms/step
Epoch 13/20
2545/2545 - 3s - loss: 0.4995 - val_loss: 0.5015 - 3s/epoch - 1ms/step
Epoch 14/20
2545/2545 - 3s - loss: 0.4996 - val_loss: 0.5010 - 3s/epoch - 1ms/step
Epoch 15/20
2545/2545 - 3s - loss: 0.5000 - val_loss: 0.5011 - 3s/epoch - 1ms/step
Epoch 16/20
2545/2545 - 3s - loss: 0.4985 - val_loss: 0.5027 - 3s/epoch - 1ms/step
Epoch 17/20
2545/2545 - 3s - loss: 0.4995 - val_loss: 0.5009 - 3s/epoch - 1ms/step
Epoch 18/20
2545/2545 - 3s - loss: 0.4990 - val_loss: 0.5020 - 3s/epoch - 1ms/step
Epoch 19/20
2545/2545 - 3s - loss: 0.5000 - val_loss: 0.5012 - 3s/epoch - 1ms/step
Epoch 20/20
2545/2545 - 3s - loss: 0.4985 - val_loss: 0.5018 - 3s/epoch - 1ms/step
Run completed: runs/2023-05-10T03-53-23Z
Training run 12/12 (flags = list(0.001, 64, 32, 128, 64, 0.2))
Using run directory runs/2023-05-10T03-54-21Z
> FLAGS= flags(
+ flag_numeric("learning_rate", 0.01),
+ flag_numeric("units1", 32),
+ flag_numeric('units2', 16),
+ flag_numeric('units3', 8) .... [TRUNCATED]
> model2 <- keras_model_sequential() %>%
+ layer_dense(units = FLAGS$units1, activation = "relu",
+ input_shape = dim(train_encoded[]) .... [TRUNCATED]
> opt= optimizer_adam(learning_rate= FLAGS$learning_rate)
> model2 %>% compile(
+ loss = "binary_crossentropy",
+ optimizer = opt )
> history <- model %>% fit(as.matrix(train_encoded), training_labels,
+ epochs = 20,
+ batch_size = .... [TRUNCATED]
Epoch 1/20
WARNING:tensorflow:Callback method `on_train_batch_end` is slow compared to the batch time (batch time: 0.0009s vs `on_train_batch_end` time: 0.0024s). Check your callbacks.
2545/2545 - 3s - loss: 0.4996 - val_loss: 0.5005 - 3s/epoch - 1ms/step
Epoch 2/20
2545/2545 - 3s - loss: 0.4994 - val_loss: 0.5020 - 3s/epoch - 1ms/step
Epoch 3/20
2545/2545 - 3s - loss: 0.4990 - val_loss: 0.5009 - 3s/epoch - 1ms/step
Epoch 4/20
2545/2545 - 4s - loss: 0.4996 - val_loss: 0.5005 - 4s/epoch - 1ms/step
Epoch 5/20
2545/2545 - 3s - loss: 0.4994 - val_loss: 0.5019 - 3s/epoch - 1ms/step
Epoch 6/20
2545/2545 - 3s - loss: 0.4994 - val_loss: 0.5006 - 3s/epoch - 1ms/step
Epoch 7/20
2545/2545 - 3s - loss: 0.4991 - val_loss: 0.5014 - 3s/epoch - 1ms/step
Epoch 8/20
2545/2545 - 4s - loss: 0.5001 - val_loss: 0.5016 - 4s/epoch - 2ms/step
Epoch 9/20
2545/2545 - 4s - loss: 0.4989 - val_loss: 0.5036 - 4s/epoch - 2ms/step
Epoch 10/20
2545/2545 - 4s - loss: 0.5002 - val_loss: 0.5013 - 4s/epoch - 2ms/step
Epoch 11/20
2545/2545 - 4s - loss: 0.4996 - val_loss: 0.5027 - 4s/epoch - 2ms/step
Epoch 12/20
2545/2545 - 4s - loss: 0.4992 - val_loss: 0.5016 - 4s/epoch - 2ms/step
Epoch 13/20
2545/2545 - 4s - loss: 0.4988 - val_loss: 0.5056 - 4s/epoch - 2ms/step
Epoch 14/20
2545/2545 - 4s - loss: 0.4993 - val_loss: 0.5020 - 4s/epoch - 1ms/step
Epoch 15/20
2545/2545 - 4s - loss: 0.5004 - val_loss: 0.5001 - 4s/epoch - 2ms/step
Epoch 16/20
2545/2545 - 3s - loss: 0.4991 - val_loss: 0.5025 - 3s/epoch - 1ms/step
Epoch 17/20
2545/2545 - 4s - loss: 0.4997 - val_loss: 0.5040 - 4s/epoch - 2ms/step
Epoch 18/20
2545/2545 - 3s - loss: 0.4993 - val_loss: 0.5020 - 3s/epoch - 1ms/step
Epoch 19/20
2545/2545 - 3s - loss: 0.4978 - val_loss: 0.5073 - 3s/epoch - 1ms/step
Epoch 20/20
2545/2545 - 3s - loss: 0.4987 - val_loss: 0.5010 - 3s/epoch - 1ms/step
Run completed: runs/2023-05-10T03-54-21Z
view_run(runs$run_dir[1])
Warning: incomplete final line found on '/var/folders/fk/7yp8kndx0634lxz4zzv85g9w0000gp/T//RtmpIweksP/fileaf661d4a828f/source/tuning_script_1.R'Warning: incomplete final line found on '/var/folders/fk/7yp8kndx0634lxz4zzv85g9w0000gp/T//RtmpIweksP/fileaf661d4a828f/source/tuning_script_2.R'Warning: incomplete final line found on '/var/folders/fk/7yp8kndx0634lxz4zzv85g9w0000gp/T//RtmpIweksP/fileaf661d4a828f/source/tuning_script_3.R'
knitr::include_graphics("final_plot2.png")
if (runs[1, ]$metric_loss > runs[1, ]$metric_val_loss) {
print("The model doesn't overfit.")
} else {
print("The model overfits.")
}
[1] "The model overfits."
library(keras)
library(tfruns)
best_model <- keras_model_sequential() %>%
layer_dense(units = 512, activation = "relu", input_shape = dim(train_encoded)[2]) %>%
layer_dropout(rate = 0.2) %>%
layer_dense(units = 16, activation = "relu") %>%
layer_dropout(rate = 0.2) %>%
layer_dense(units = 8, activation = "relu") %>%
layer_dropout(rate = 0.2) %>%
layer_dense(units = 1, activation = "sigmoid")
opt= optimizer_adam(learning_rate=0.01)
best_model %>% compile(
loss = "binary_crossentropy",
optimizer = opt)
combined_train_x= rbind(train_encoded, val_encoded)
combined_train_y= c(training_labels, validation_labels)
history <- best_model %>% fit(as.matrix(combined_train_x),
combined_train_y,
batch_size=64,
epochs = 20, verbose=2)
Epoch 1/20
884/884 - 3s - loss: 0.5387 - 3s/epoch - 4ms/step
Epoch 2/20
884/884 - 2s - loss: 0.5285 - 2s/epoch - 2ms/step
Epoch 3/20
884/884 - 1s - loss: 0.5266 - 1s/epoch - 2ms/step
Epoch 4/20
884/884 - 1s - loss: 0.5268 - 1s/epoch - 2ms/step
Epoch 5/20
884/884 - 2s - loss: 0.5246 - 2s/epoch - 2ms/step
Epoch 6/20
884/884 - 2s - loss: 0.5264 - 2s/epoch - 2ms/step
Epoch 7/20
884/884 - 1s - loss: 0.5245 - 1s/epoch - 2ms/step
Epoch 8/20
884/884 - 1s - loss: 0.5245 - 1s/epoch - 2ms/step
Epoch 9/20
884/884 - 1s - loss: 0.5271 - 1s/epoch - 2ms/step
Epoch 10/20
884/884 - 2s - loss: 0.5241 - 2s/epoch - 2ms/step
Epoch 11/20
884/884 - 2s - loss: 0.5243 - 2s/epoch - 2ms/step
Epoch 12/20
884/884 - 1s - loss: 0.5239 - 1s/epoch - 2ms/step
Epoch 13/20
884/884 - 2s - loss: 0.5244 - 2s/epoch - 2ms/step
Epoch 14/20
884/884 - 1s - loss: 0.5252 - 1s/epoch - 2ms/step
Epoch 15/20
884/884 - 2s - loss: 0.5236 - 2s/epoch - 2ms/step
Epoch 16/20
884/884 - 2s - loss: 0.5229 - 2s/epoch - 2ms/step
Epoch 17/20
884/884 - 1s - loss: 0.5233 - 1s/epoch - 2ms/step
Epoch 18/20
884/884 - 1s - loss: 0.5236 - 1s/epoch - 2ms/step
Epoch 19/20
884/884 - 2s - loss: 0.5238 - 2s/epoch - 2ms/step
Epoch 20/20
884/884 - 2s - loss: 0.5247 - 2s/epoch - 2ms/step
best_model_predictions <- best_model %>% predict(as.matrix(test_encoded))
1/442 [..............................] - ETA: 26s
67/442 [===>..........................] - ETA: 0s
133/442 [========>.....................] - ETA: 0s
191/442 [===========>..................] - ETA: 0s
256/442 [================>.............] - ETA: 0s
315/442 [====================>.........] - ETA: 0s
381/442 [========================>.....] - ETA: 0s
437/442 [============================>.] - ETA: 0s
442/442 [==============================] - 0s 831us/step
442/442 [==============================] - 0s 831us/step
predicted_labels <- as.factor(ifelse(best_model_predictions <= 0.5, 0, 1)[,1])
confusion_matrix_class0 <- confusionMatrix(predicted_labels, as.factor(test_labels), mode='everything')
print(confusion_matrix_class0)
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 4910 1336
1 2159 5733
Accuracy : 0.7528
95% CI : (0.7456, 0.7599)
No Information Rate : 0.5
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.5056
Mcnemar's Test P-Value : < 2.2e-16
Sensitivity : 0.6946
Specificity : 0.8110
Pos Pred Value : 0.7861
Neg Pred Value : 0.7264
Precision : 0.7861
Recall : 0.6946
F1 : 0.7375
Prevalence : 0.5000
Detection Rate : 0.3473
Detection Prevalence : 0.4418
Balanced Accuracy : 0.7528
'Positive' Class : 0
confusion_matrix_class1 <- confusionMatrix(predicted_labels, as.factor(test_labels), mode='everything', positive='1')
print(confusion_matrix_class1)
Confusion Matrix and Statistics
Reference
Prediction 0 1
0 4910 1336
1 2159 5733
Accuracy : 0.7528
95% CI : (0.7456, 0.7599)
No Information Rate : 0.5
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.5056
Mcnemar's Test P-Value : < 2.2e-16
Sensitivity : 0.8110
Specificity : 0.6946
Pos Pred Value : 0.7264
Neg Pred Value : 0.7861
Precision : 0.7264
Recall : 0.8110
F1 : 0.7664
Prevalence : 0.5000
Detection Rate : 0.4055
Detection Prevalence : 0.5582
Balanced Accuracy : 0.7528
'Positive' Class : 1
knitr::include_graphics("final_plot3.png")